{
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Decision Trees for Classification Explained "
      ],
      "metadata": {}
    },
    {
      "cell_type": "markdown",
      "source": [
        "#### Build Decision Tree in classification and regression algorithm to build a model."
      ],
      "metadata": {
        "inputHidden": false,
        "outputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "import pandas as pd\n",
        "\n",
        "import warnings\n",
        "warnings.filterwarnings(\"ignore\")\n",
        "\n",
        "# yahoo finance is used to fetch data \n",
        "import yfinance as yf\n",
        "yf.pdr_override()"
      ],
      "outputs": [],
      "execution_count": 1,
      "metadata": {
        "collapsed": false,
        "execution": {
          "iopub.execute_input": "2021-09-07T16:56:19.980Z",
          "iopub.status.busy": "2021-09-07T16:56:19.972Z",
          "iopub.status.idle": "2021-09-07T16:56:19.987Z",
          "shell.execute_reply": "2021-09-07T16:56:19.923Z"
        },
        "inputHidden": false,
        "jupyter": {
          "outputs_hidden": false
        },
        "outputHidden": false
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "#### Getting the histocial data from yahoo finance"
      ],
      "metadata": {
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# input\n",
        "symbol = 'AMD'\n",
        "start = '2012-01-01'\n",
        "end = '2021-09-05'\n",
        "\n",
        "# Read data \n",
        "dataset = yf.download(symbol,start,end)\n",
        "\n",
        "# View Columns\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[*********************100%***********************]  1 of 1 completed\n"
          ]
        },
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/plain": "            Open  High   Low  Close  Adj Close    Volume\nDate                                                    \n2012-01-03  5.53  5.59  5.44   5.48       5.48  12675100\n2012-01-04  5.47  5.49  5.41   5.46       5.46   8034900\n2012-01-05  5.45  5.57  5.35   5.46       5.46  11476900\n2012-01-06  5.44  5.52  5.39   5.43       5.43  12938600\n2012-01-09  5.42  5.60  5.38   5.59       5.59  12585400",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2012-01-03</th>\n      <td>5.53</td>\n      <td>5.59</td>\n      <td>5.44</td>\n      <td>5.48</td>\n      <td>5.48</td>\n      <td>12675100</td>\n    </tr>\n    <tr>\n      <th>2012-01-04</th>\n      <td>5.47</td>\n      <td>5.49</td>\n      <td>5.41</td>\n      <td>5.46</td>\n      <td>5.46</td>\n      <td>8034900</td>\n    </tr>\n    <tr>\n      <th>2012-01-05</th>\n      <td>5.45</td>\n      <td>5.57</td>\n      <td>5.35</td>\n      <td>5.46</td>\n      <td>5.46</td>\n      <td>11476900</td>\n    </tr>\n    <tr>\n      <th>2012-01-06</th>\n      <td>5.44</td>\n      <td>5.52</td>\n      <td>5.39</td>\n      <td>5.43</td>\n      <td>5.43</td>\n      <td>12938600</td>\n    </tr>\n    <tr>\n      <th>2012-01-09</th>\n      <td>5.42</td>\n      <td>5.60</td>\n      <td>5.38</td>\n      <td>5.59</td>\n      <td>5.59</td>\n      <td>12585400</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 2,
      "metadata": {
        "collapsed": false,
        "execution": {
          "iopub.execute_input": "2021-09-07T16:56:20.002Z",
          "iopub.status.busy": "2021-09-07T16:56:19.996Z",
          "iopub.status.idle": "2021-09-07T16:56:20.494Z",
          "shell.execute_reply": "2021-09-07T16:56:20.685Z"
        },
        "inputHidden": false,
        "jupyter": {
          "outputs_hidden": false
        },
        "outputHidden": false
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "#### Create more data"
      ],
      "metadata": {
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Create more data\n",
        "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,-1)\n",
        "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,-1)\n",
        "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,-1)\n",
        "dataset['Return'] = dataset['Adj Close'].pct_change()\n",
        "dataset = dataset.dropna()\n",
        "dataset['Up_Down'] = np.where(dataset['Return'].shift(-1) > dataset['Return'],'Up','Down')\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 3,
          "data": {
            "text/plain": "            Open  High   Low  Close  Adj Close    Volume  Increase_Decrease  \\\nDate                                                                          \n2012-01-04  5.47  5.49  5.41   5.46       5.46   8034900                  1   \n2012-01-05  5.45  5.57  5.35   5.46       5.46  11476900                  1   \n2012-01-06  5.44  5.52  5.39   5.43       5.43  12938600                 -1   \n2012-01-09  5.42  5.60  5.38   5.59       5.59  12585400                 -1   \n2012-01-10  5.66  5.75  5.64   5.71       5.71  10129600                  1   \n\n            Buy_Sell_on_Open  Buy_Sell    Return Up_Down  \nDate                                                      \n2012-01-04                -1        -1 -0.003650      Up  \n2012-01-05                -1        -1  0.000000    Down  \n2012-01-06                -1         1 -0.005495      Up  \n2012-01-09                 1         1  0.029466    Down  \n2012-01-10                 1         1  0.021467    Down  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n      <th>Increase_Decrease</th>\n      <th>Buy_Sell_on_Open</th>\n      <th>Buy_Sell</th>\n      <th>Return</th>\n      <th>Up_Down</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2012-01-04</th>\n      <td>5.47</td>\n      <td>5.49</td>\n      <td>5.41</td>\n      <td>5.46</td>\n      <td>5.46</td>\n      <td>8034900</td>\n      <td>1</td>\n      <td>-1</td>\n      <td>-1</td>\n      <td>-0.003650</td>\n      <td>Up</td>\n    </tr>\n    <tr>\n      <th>2012-01-05</th>\n      <td>5.45</td>\n      <td>5.57</td>\n      <td>5.35</td>\n      <td>5.46</td>\n      <td>5.46</td>\n      <td>11476900</td>\n      <td>1</td>\n      <td>-1</td>\n      <td>-1</td>\n      <td>0.000000</td>\n      <td>Down</td>\n    </tr>\n    <tr>\n      <th>2012-01-06</th>\n      <td>5.44</td>\n      <td>5.52</td>\n      <td>5.39</td>\n      <td>5.43</td>\n      <td>5.43</td>\n      <td>12938600</td>\n      <td>-1</td>\n      <td>-1</td>\n      <td>1</td>\n      <td>-0.005495</td>\n      <td>Up</td>\n    </tr>\n    <tr>\n      <th>2012-01-09</th>\n      <td>5.42</td>\n      <td>5.60</td>\n      <td>5.38</td>\n      <td>5.59</td>\n      <td>5.59</td>\n      <td>12585400</td>\n      <td>-1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.029466</td>\n      <td>Down</td>\n    </tr>\n    <tr>\n      <th>2012-01-10</th>\n      <td>5.66</td>\n      <td>5.75</td>\n      <td>5.64</td>\n      <td>5.71</td>\n      <td>5.71</td>\n      <td>10129600</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.021467</td>\n      <td>Down</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 3,
      "metadata": {
        "collapsed": false,
        "execution": {
          "iopub.execute_input": "2021-09-07T16:56:20.511Z",
          "iopub.status.busy": "2021-09-07T16:56:20.505Z",
          "iopub.status.idle": "2021-09-07T16:56:20.524Z",
          "shell.execute_reply": "2021-09-07T16:56:20.690Z"
        },
        "inputHidden": false,
        "jupyter": {
          "outputs_hidden": false
        },
        "outputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset.shape"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 4,
          "data": {
            "text/plain": "(2434, 11)"
          },
          "metadata": {}
        }
      ],
      "execution_count": 4,
      "metadata": {
        "collapsed": false,
        "execution": {
          "iopub.execute_input": "2021-09-07T16:56:20.539Z",
          "iopub.status.busy": "2021-09-07T16:56:20.533Z",
          "iopub.status.idle": "2021-09-07T16:56:20.553Z",
          "shell.execute_reply": "2021-09-07T16:56:20.695Z"
        },
        "inputHidden": false,
        "jupyter": {
          "outputs_hidden": false
        },
        "outputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "predictors_list = ['Open', 'High','Low', 'Adj Close', 'Return', 'Volume']\n",
        "X = dataset[predictors_list]\n",
        "X.tail()"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 5,
          "data": {
            "text/plain": "                  Open        High         Low   Adj Close    Return    Volume\nDate                                                                          \n2021-08-30  112.610001  114.489998  111.260002  111.320000 -0.000718  56130500\n2021-08-31  111.260002  111.260002  109.029999  110.720001 -0.005390  49339000\n2021-09-01  111.300003  111.849998  109.849998  109.989998 -0.006593  38579600\n2021-09-02  110.320000  110.879997  108.769997  109.199997 -0.007182  40090500\n2021-09-03  108.849998  111.169998  108.500000  109.919998  0.006593  42605800",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Adj Close</th>\n      <th>Return</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2021-08-30</th>\n      <td>112.610001</td>\n      <td>114.489998</td>\n      <td>111.260002</td>\n      <td>111.320000</td>\n      <td>-0.000718</td>\n      <td>56130500</td>\n    </tr>\n    <tr>\n      <th>2021-08-31</th>\n      <td>111.260002</td>\n      <td>111.260002</td>\n      <td>109.029999</td>\n      <td>110.720001</td>\n      <td>-0.005390</td>\n      <td>49339000</td>\n    </tr>\n    <tr>\n      <th>2021-09-01</th>\n      <td>111.300003</td>\n      <td>111.849998</td>\n      <td>109.849998</td>\n      <td>109.989998</td>\n      <td>-0.006593</td>\n      <td>38579600</td>\n    </tr>\n    <tr>\n      <th>2021-09-02</th>\n      <td>110.320000</td>\n      <td>110.879997</td>\n      <td>108.769997</td>\n      <td>109.199997</td>\n      <td>-0.007182</td>\n      <td>40090500</td>\n    </tr>\n    <tr>\n      <th>2021-09-03</th>\n      <td>108.849998</td>\n      <td>111.169998</td>\n      <td>108.500000</td>\n      <td>109.919998</td>\n      <td>0.006593</td>\n      <td>42605800</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 5,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2021-09-07T16:56:20.562Z",
          "iopub.execute_input": "2021-09-07T16:56:20.568Z",
          "iopub.status.idle": "2021-09-07T16:56:20.582Z",
          "shell.execute_reply": "2021-09-07T16:56:20.701Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "y = dataset['Buy_Sell']"
      ],
      "outputs": [],
      "execution_count": 6,
      "metadata": {
        "collapsed": false,
        "execution": {
          "iopub.execute_input": "2021-09-07T16:56:20.599Z",
          "iopub.status.busy": "2021-09-07T16:56:20.592Z",
          "iopub.status.idle": "2021-09-07T16:56:20.611Z",
          "shell.execute_reply": "2021-09-07T16:56:20.706Z"
        },
        "inputHidden": false,
        "jupyter": {
          "outputs_hidden": false
        },
        "outputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "y.name"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 30,
          "data": {
            "text/plain": "'Buy_Sell'"
          },
          "metadata": {}
        }
      ],
      "execution_count": 30,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2021-09-07T17:35:47.944Z",
          "iopub.execute_input": "2021-09-07T17:35:47.951Z",
          "iopub.status.idle": "2021-09-07T17:35:47.964Z",
          "shell.execute_reply": "2021-09-07T17:35:47.970Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X.columns"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 7,
          "data": {
            "text/plain": "Index(['Open', 'High', 'Low', 'Adj Close', 'Return', 'Volume'], dtype='object')"
          },
          "metadata": {}
        }
      ],
      "execution_count": 7,
      "metadata": {
        "execution": {
          "iopub.execute_input": "2021-09-07T16:56:20.631Z",
          "iopub.status.busy": "2021-09-07T16:56:20.623Z",
          "iopub.status.idle": "2021-09-07T16:56:20.646Z",
          "shell.execute_reply": "2021-09-07T16:56:20.710Z"
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "#### test_size\n",
        "the size of the test data set, in this case, 30% of the data for the tests and, therefore, 70% for the training.\n",
        "#### random_state\n",
        "Since the sampling is random, this parameter allows us to reproduce the same randomness in each execution.\n",
        "#### stratify\n",
        "To ensure that the training and test sample data are proportional, we set the parameter to yes. This means that, for example, if there are more days with positive than negative return, the training and test samples will keep the same proportion. "
      ],
      "metadata": {
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.model_selection import train_test_split  \n",
        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=200, stratify=y)"
      ],
      "outputs": [],
      "execution_count": 8,
      "metadata": {
        "collapsed": false,
        "execution": {
          "iopub.execute_input": "2021-09-07T16:56:20.663Z",
          "iopub.status.busy": "2021-09-07T16:56:20.657Z",
          "iopub.status.idle": "2021-09-07T16:56:20.816Z",
          "shell.execute_reply": "2021-09-07T16:56:20.807Z"
        },
        "inputHidden": false,
        "jupyter": {
          "outputs_hidden": false
        },
        "outputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "train_length = int(len(dataset)*0.80)\n",
        "X_train = X[:train_length]\n",
        "X_test = X[train_length:]\n",
        "y_train = y[:train_length]\n",
        "y_test = y[train_length:]\n",
        "\n",
        "print (X_train.shape, y_train.shape)\n",
        "print (X_test.shape, y_test.shape)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(1947, 6) (1947,)\n",
            "(487, 6) (487,)\n"
          ]
        }
      ],
      "execution_count": 9,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2021-09-07T16:56:20.825Z",
          "iopub.execute_input": "2021-09-07T16:56:20.831Z",
          "iopub.status.idle": "2021-09-07T16:56:20.846Z",
          "shell.execute_reply": "2021-09-07T16:56:20.906Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.tree import DecisionTreeClassifier\n",
        "clf = DecisionTreeClassifier(criterion='gini', max_depth=3, min_samples_leaf=3)\n",
        "clf.fit(X_train, y_train)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 10,
          "data": {
            "text/plain": "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=3,\n            max_features=None, max_leaf_nodes=None,\n            min_impurity_decrease=0.0, min_impurity_split=None,\n            min_samples_leaf=3, min_samples_split=2,\n            min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n            splitter='best')"
          },
          "metadata": {}
        }
      ],
      "execution_count": 10,
      "metadata": {
        "collapsed": false,
        "execution": {
          "iopub.execute_input": "2021-09-07T16:56:20.864Z",
          "iopub.status.busy": "2021-09-07T16:56:20.856Z",
          "iopub.status.idle": "2021-09-07T16:56:20.877Z",
          "shell.execute_reply": "2021-09-07T16:56:20.912Z"
        },
        "inputHidden": false,
        "jupyter": {
          "outputs_hidden": false
        },
        "outputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from mlxtend.evaluate import bias_variance_decomp\n",
        "\n",
        "avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(clf, X_train.values, y_train.values, X_test.values, y_test.values, loss='mse', random_seed=123)\n",
        "\n",
        "print('MSE: %.3f' % avg_expected_loss)\n",
        "print('Bias: %.3f' % avg_bias)\n",
        "print('Variance: %.3f' % avg_var)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "MSE: 1.995\n",
            "Bias: 1.100\n",
            "Variance: 0.896\n"
          ]
        }
      ],
      "execution_count": 11,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2021-09-07T16:56:20.887Z",
          "iopub.execute_input": "2021-09-07T16:56:20.893Z",
          "iopub.status.idle": "2021-09-07T16:56:21.635Z",
          "shell.execute_reply": "2021-09-07T16:56:21.622Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Prediction\n",
        "prediction = clf.predict(X_test)"
      ],
      "outputs": [],
      "execution_count": 12,
      "metadata": {
        "collapsed": false,
        "execution": {
          "iopub.execute_input": "2021-09-07T16:56:21.653Z",
          "iopub.status.busy": "2021-09-07T16:56:21.647Z",
          "iopub.status.idle": "2021-09-07T16:56:21.666Z",
          "shell.execute_reply": "2021-09-07T16:56:21.985Z"
        },
        "inputHidden": false,
        "jupyter": {
          "outputs_hidden": false
        },
        "outputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Evaluation\n",
        "from sklearn import metrics\n",
        "print(\"DecisionTrees's Accuracy: \", metrics.accuracy_score(y_test, prediction))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "DecisionTrees's Accuracy:  0.4989733059548255\n"
          ]
        }
      ],
      "execution_count": 13,
      "metadata": {
        "collapsed": false,
        "execution": {
          "iopub.execute_input": "2021-09-07T16:56:21.683Z",
          "iopub.status.busy": "2021-09-07T16:56:21.676Z",
          "iopub.status.idle": "2021-09-07T16:56:21.698Z",
          "shell.execute_reply": "2021-09-07T16:56:21.999Z"
        },
        "inputHidden": false,
        "jupyter": {
          "outputs_hidden": false
        },
        "outputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Accuracy Score without Sklearn\n",
        "boolian = (y_test==prediction)\n",
        "accuracy = sum(boolian)/y_test.size\n",
        "accuracy"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 14,
          "data": {
            "text/plain": "0.4989733059548255"
          },
          "metadata": {}
        }
      ],
      "execution_count": 14,
      "metadata": {
        "collapsed": false,
        "execution": {
          "iopub.execute_input": "2021-09-07T16:56:21.713Z",
          "iopub.status.busy": "2021-09-07T16:56:21.707Z",
          "iopub.status.idle": "2021-09-07T16:56:21.728Z",
          "shell.execute_reply": "2021-09-07T16:56:22.023Z"
        },
        "inputHidden": false,
        "jupyter": {
          "outputs_hidden": false
        },
        "outputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn import tree\n",
        "import graphviz\n",
        "dot_data = tree.export_graphviz(clf, out_file=None, \n",
        "                                feature_names=X.columns,  \n",
        "                                class_names=X.columns)\n",
        "dot_data = tree.export_graphviz(clf, out_file=None,filled=True,feature_names=predictors_list)\n",
        "graphviz.Source(dot_data)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 15,
          "data": {
            "text/plain": "<graphviz.files.Source at 0x2112fb26f60>",
            "image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\r\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\r\n<!-- Generated by graphviz version 2.46.1 (0)\r\n -->\r\n<!-- Title: Tree Pages: 1 -->\r\n<svg width=\"998pt\" height=\"373pt\"\r\n viewBox=\"0.00 0.00 998.00 373.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\r\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 369)\">\r\n<title>Tree</title>\r\n<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-369 994,-369 994,4 -4,4\"/>\r\n<!-- 0 -->\r\n<g id=\"node1\" class=\"node\">\r\n<title>0</title>\r\n<polygon fill=\"#399de5\" fill-opacity=\"0.007843\" stroke=\"black\" points=\"541.5,-365 392.5,-365 392.5,-297 541.5,-297 541.5,-365\"/>\r\n<text text-anchor=\"middle\" x=\"467\" y=\"-349.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">Volume &lt;= 13930850.0</text>\r\n<text text-anchor=\"middle\" x=\"467\" y=\"-334.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">gini = 0.5</text>\r\n<text text-anchor=\"middle\" x=\"467\" y=\"-319.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">samples = 1947</text>\r\n<text text-anchor=\"middle\" x=\"467\" y=\"-304.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">value = [969, 978]</text>\r\n</g>\r\n<!-- 1 -->\r\n<g id=\"node2\" class=\"node\">\r\n<title>1</title>\r\n<polygon fill=\"#e58139\" fill-opacity=\"0.274510\" stroke=\"black\" points=\"465.5,-261 316.5,-261 316.5,-193 465.5,-193 465.5,-261\"/>\r\n<text text-anchor=\"middle\" x=\"391\" y=\"-245.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">Volume &lt;= 12470150.0</text>\r\n<text text-anchor=\"middle\" x=\"391\" y=\"-230.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">gini = 0.487</text>\r\n<text text-anchor=\"middle\" x=\"391\" y=\"-215.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">samples = 478</text>\r\n<text text-anchor=\"middle\" x=\"391\" y=\"-200.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">value = [277, 201]</text>\r\n</g>\r\n<!-- 0&#45;&gt;1 -->\r\n<g id=\"edge1\" class=\"edge\">\r\n<title>0&#45;&gt;1</title>\r\n<path fill=\"none\" stroke=\"black\" d=\"M442.32,-296.88C435.82,-288.15 428.72,-278.62 421.93,-269.51\"/>\r\n<polygon fill=\"black\" stroke=\"black\" points=\"424.59,-267.23 415.81,-261.3 418.98,-271.41 424.59,-267.23\"/>\r\n<text text-anchor=\"middle\" x=\"412.2\" y=\"-282.34\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">True</text>\r\n</g>\r\n<!-- 8 -->\r\n<g id=\"node9\" class=\"node\">\r\n<title>8</title>\r\n<polygon fill=\"#399de5\" fill-opacity=\"0.109804\" stroke=\"black\" points=\"627,-261 509,-261 509,-193 627,-193 627,-261\"/>\r\n<text text-anchor=\"middle\" x=\"568\" y=\"-245.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">Return &lt;= &#45;0.001</text>\r\n<text text-anchor=\"middle\" x=\"568\" y=\"-230.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">gini = 0.498</text>\r\n<text text-anchor=\"middle\" x=\"568\" y=\"-215.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">samples = 1469</text>\r\n<text text-anchor=\"middle\" x=\"568\" y=\"-200.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">value = [692, 777]</text>\r\n</g>\r\n<!-- 0&#45;&gt;8 -->\r\n<g id=\"edge8\" class=\"edge\">\r\n<title>0&#45;&gt;8</title>\r\n<path fill=\"none\" stroke=\"black\" d=\"M499.79,-296.88C508.7,-287.89 518.45,-278.04 527.72,-268.68\"/>\r\n<polygon fill=\"black\" stroke=\"black\" points=\"530.48,-270.87 535.03,-261.3 525.5,-265.94 530.48,-270.87\"/>\r\n<text text-anchor=\"middle\" x=\"535.15\" y=\"-282.6\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">False</text>\r\n</g>\r\n<!-- 2 -->\r\n<g id=\"node3\" class=\"node\">\r\n<title>2</title>\r\n<polygon fill=\"#e58139\" fill-opacity=\"0.160784\" stroke=\"black\" points=\"259.5,-157 110.5,-157 110.5,-89 259.5,-89 259.5,-157\"/>\r\n<text text-anchor=\"middle\" x=\"185\" y=\"-141.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">Volume &lt;= 12021650.0</text>\r\n<text text-anchor=\"middle\" x=\"185\" y=\"-126.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">gini = 0.496</text>\r\n<text text-anchor=\"middle\" x=\"185\" y=\"-111.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">samples = 386</text>\r\n<text text-anchor=\"middle\" x=\"185\" y=\"-96.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">value = [210, 176]</text>\r\n</g>\r\n<!-- 1&#45;&gt;2 -->\r\n<g id=\"edge2\" class=\"edge\">\r\n<title>1&#45;&gt;2</title>\r\n<path fill=\"none\" stroke=\"black\" d=\"M324.12,-192.88C303.96,-182.9 281.68,-171.87 260.98,-161.62\"/>\r\n<polygon fill=\"black\" stroke=\"black\" points=\"262.41,-158.43 251.9,-157.12 259.31,-164.7 262.41,-158.43\"/>\r\n</g>\r\n<!-- 5 -->\r\n<g id=\"node6\" class=\"node\">\r\n<title>5</title>\r\n<polygon fill=\"#e58139\" fill-opacity=\"0.627451\" stroke=\"black\" points=\"451.5,-157 330.5,-157 330.5,-89 451.5,-89 451.5,-157\"/>\r\n<text text-anchor=\"middle\" x=\"391\" y=\"-141.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">Adj Close &lt;= 3.64</text>\r\n<text text-anchor=\"middle\" x=\"391\" y=\"-126.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">gini = 0.396</text>\r\n<text text-anchor=\"middle\" x=\"391\" y=\"-111.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">samples = 92</text>\r\n<text text-anchor=\"middle\" x=\"391\" y=\"-96.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">value = [67, 25]</text>\r\n</g>\r\n<!-- 1&#45;&gt;5 -->\r\n<g id=\"edge5\" class=\"edge\">\r\n<title>1&#45;&gt;5</title>\r\n<path fill=\"none\" stroke=\"black\" d=\"M391,-192.88C391,-184.78 391,-175.98 391,-167.47\"/>\r\n<polygon fill=\"black\" stroke=\"black\" points=\"394.5,-167.3 391,-157.3 387.5,-167.3 394.5,-167.3\"/>\r\n</g>\r\n<!-- 3 -->\r\n<g id=\"node4\" class=\"node\">\r\n<title>3</title>\r\n<polygon fill=\"#e58139\" fill-opacity=\"0.235294\" stroke=\"black\" points=\"118,-53 0,-53 0,0 118,0 118,-53\"/>\r\n<text text-anchor=\"middle\" x=\"59\" y=\"-37.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">gini = 0.491</text>\r\n<text text-anchor=\"middle\" x=\"59\" y=\"-22.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">samples = 367</text>\r\n<text text-anchor=\"middle\" x=\"59\" y=\"-7.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">value = [208, 159]</text>\r\n</g>\r\n<!-- 2&#45;&gt;3 -->\r\n<g id=\"edge3\" class=\"edge\">\r\n<title>2&#45;&gt;3</title>\r\n<path fill=\"none\" stroke=\"black\" d=\"M140.93,-88.95C128.12,-79.34 114.17,-68.87 101.46,-59.34\"/>\r\n<polygon fill=\"black\" stroke=\"black\" points=\"103.42,-56.44 93.32,-53.24 99.22,-62.04 103.42,-56.44\"/>\r\n</g>\r\n<!-- 4 -->\r\n<g id=\"node5\" class=\"node\">\r\n<title>4</title>\r\n<polygon fill=\"#399de5\" fill-opacity=\"0.882353\" stroke=\"black\" points=\"234,-53 136,-53 136,0 234,0 234,-53\"/>\r\n<text text-anchor=\"middle\" x=\"185\" y=\"-37.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">gini = 0.188</text>\r\n<text text-anchor=\"middle\" x=\"185\" y=\"-22.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">samples = 19</text>\r\n<text text-anchor=\"middle\" x=\"185\" y=\"-7.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">value = [2, 17]</text>\r\n</g>\r\n<!-- 2&#45;&gt;4 -->\r\n<g id=\"edge4\" class=\"edge\">\r\n<title>2&#45;&gt;4</title>\r\n<path fill=\"none\" stroke=\"black\" d=\"M185,-88.95C185,-80.72 185,-71.85 185,-63.48\"/>\r\n<polygon fill=\"black\" stroke=\"black\" points=\"188.5,-63.24 185,-53.24 181.5,-63.24 188.5,-63.24\"/>\r\n</g>\r\n<!-- 6 -->\r\n<g id=\"node7\" class=\"node\">\r\n<title>6</title>\r\n<polygon fill=\"#e58139\" fill-opacity=\"0.843137\" stroke=\"black\" points=\"350,-53 252,-53 252,0 350,0 350,-53\"/>\r\n<text text-anchor=\"middle\" x=\"301\" y=\"-37.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">gini = 0.233</text>\r\n<text text-anchor=\"middle\" x=\"301\" y=\"-22.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">samples = 52</text>\r\n<text text-anchor=\"middle\" x=\"301\" y=\"-7.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">value = [45, 7]</text>\r\n</g>\r\n<!-- 5&#45;&gt;6 -->\r\n<g id=\"edge6\" class=\"edge\">\r\n<title>5&#45;&gt;6</title>\r\n<path fill=\"none\" stroke=\"black\" d=\"M359.52,-88.95C350.81,-79.8 341.35,-69.87 332.63,-60.71\"/>\r\n<polygon fill=\"black\" stroke=\"black\" points=\"334.94,-58.06 325.51,-53.24 329.87,-62.89 334.94,-58.06\"/>\r\n</g>\r\n<!-- 7 -->\r\n<g id=\"node8\" class=\"node\">\r\n<title>7</title>\r\n<polygon fill=\"#e58139\" fill-opacity=\"0.180392\" stroke=\"black\" points=\"472,-53 368,-53 368,0 472,0 472,-53\"/>\r\n<text text-anchor=\"middle\" x=\"420\" y=\"-37.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">gini = 0.495</text>\r\n<text text-anchor=\"middle\" x=\"420\" y=\"-22.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">samples = 40</text>\r\n<text text-anchor=\"middle\" x=\"420\" y=\"-7.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">value = [22, 18]</text>\r\n</g>\r\n<!-- 5&#45;&gt;7 -->\r\n<g id=\"edge7\" class=\"edge\">\r\n<title>5&#45;&gt;7</title>\r\n<path fill=\"none\" stroke=\"black\" d=\"M401.14,-88.95C403.73,-80.53 406.51,-71.45 409.13,-62.92\"/>\r\n<polygon fill=\"black\" stroke=\"black\" points=\"412.51,-63.82 412.1,-53.24 405.82,-61.77 412.51,-63.82\"/>\r\n</g>\r\n<!-- 9 -->\r\n<g id=\"node10\" class=\"node\">\r\n<title>9</title>\r\n<polygon fill=\"#399de5\" fill-opacity=\"0.254902\" stroke=\"black\" points=\"642.5,-157 493.5,-157 493.5,-89 642.5,-89 642.5,-157\"/>\r\n<text text-anchor=\"middle\" x=\"568\" y=\"-141.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">Volume &lt;= 14450850.0</text>\r\n<text text-anchor=\"middle\" x=\"568\" y=\"-126.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">gini = 0.489</text>\r\n<text text-anchor=\"middle\" x=\"568\" y=\"-111.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">samples = 677</text>\r\n<text text-anchor=\"middle\" x=\"568\" y=\"-96.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">value = [289, 388]</text>\r\n</g>\r\n<!-- 8&#45;&gt;9 -->\r\n<g id=\"edge9\" class=\"edge\">\r\n<title>8&#45;&gt;9</title>\r\n<path fill=\"none\" stroke=\"black\" d=\"M568,-192.88C568,-184.78 568,-175.98 568,-167.47\"/>\r\n<polygon fill=\"black\" stroke=\"black\" points=\"571.5,-167.3 568,-157.3 564.5,-167.3 571.5,-167.3\"/>\r\n</g>\r\n<!-- 12 -->\r\n<g id=\"node13\" class=\"node\">\r\n<title>12</title>\r\n<polygon fill=\"#e58139\" fill-opacity=\"0.035294\" stroke=\"black\" points=\"857,-157 739,-157 739,-89 857,-89 857,-157\"/>\r\n<text text-anchor=\"middle\" x=\"798\" y=\"-141.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">Return &lt;= 0.006</text>\r\n<text text-anchor=\"middle\" x=\"798\" y=\"-126.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">gini = 0.5</text>\r\n<text text-anchor=\"middle\" x=\"798\" y=\"-111.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">samples = 792</text>\r\n<text text-anchor=\"middle\" x=\"798\" y=\"-96.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">value = [403, 389]</text>\r\n</g>\r\n<!-- 8&#45;&gt;12 -->\r\n<g id=\"edge12\" class=\"edge\">\r\n<title>8&#45;&gt;12</title>\r\n<path fill=\"none\" stroke=\"black\" d=\"M627.23,-199.73C658.49,-185.87 696.99,-168.8 729.52,-154.37\"/>\r\n<polygon fill=\"black\" stroke=\"black\" points=\"731.25,-157.43 738.97,-150.18 728.41,-151.03 731.25,-157.43\"/>\r\n</g>\r\n<!-- 10 -->\r\n<g id=\"node11\" class=\"node\">\r\n<title>10</title>\r\n<polygon fill=\"#399de5\" fill-opacity=\"0.894118\" stroke=\"black\" points=\"588,-53 490,-53 490,0 588,0 588,-53\"/>\r\n<text text-anchor=\"middle\" x=\"539\" y=\"-37.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">gini = 0.172</text>\r\n<text text-anchor=\"middle\" x=\"539\" y=\"-22.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">samples = 21</text>\r\n<text text-anchor=\"middle\" x=\"539\" y=\"-7.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">value = [2, 19]</text>\r\n</g>\r\n<!-- 9&#45;&gt;10 -->\r\n<g id=\"edge10\" class=\"edge\">\r\n<title>9&#45;&gt;10</title>\r\n<path fill=\"none\" stroke=\"black\" d=\"M557.86,-88.95C555.27,-80.53 552.49,-71.45 549.87,-62.92\"/>\r\n<polygon fill=\"black\" stroke=\"black\" points=\"553.18,-61.77 546.9,-53.24 546.49,-63.82 553.18,-61.77\"/>\r\n</g>\r\n<!-- 11 -->\r\n<g id=\"node12\" class=\"node\">\r\n<title>11</title>\r\n<polygon fill=\"#399de5\" fill-opacity=\"0.223529\" stroke=\"black\" points=\"724,-53 606,-53 606,0 724,0 724,-53\"/>\r\n<text text-anchor=\"middle\" x=\"665\" y=\"-37.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">gini = 0.492</text>\r\n<text text-anchor=\"middle\" x=\"665\" y=\"-22.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">samples = 656</text>\r\n<text text-anchor=\"middle\" x=\"665\" y=\"-7.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">value = [287, 369]</text>\r\n</g>\r\n<!-- 9&#45;&gt;11 -->\r\n<g id=\"edge11\" class=\"edge\">\r\n<title>9&#45;&gt;11</title>\r\n<path fill=\"none\" stroke=\"black\" d=\"M601.93,-88.95C611.41,-79.71 621.71,-69.67 631.19,-60.44\"/>\r\n<polygon fill=\"black\" stroke=\"black\" points=\"633.86,-62.72 638.58,-53.24 628.98,-57.71 633.86,-62.72\"/>\r\n</g>\r\n<!-- 13 -->\r\n<g id=\"node14\" class=\"node\">\r\n<title>13</title>\r\n<polygon fill=\"#e58139\" fill-opacity=\"0.545098\" stroke=\"black\" points=\"853.5,-53 742.5,-53 742.5,0 853.5,0 853.5,-53\"/>\r\n<text text-anchor=\"middle\" x=\"798\" y=\"-37.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">gini = 0.43</text>\r\n<text text-anchor=\"middle\" x=\"798\" y=\"-22.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">samples = 160</text>\r\n<text text-anchor=\"middle\" x=\"798\" y=\"-7.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">value = [110, 50]</text>\r\n</g>\r\n<!-- 12&#45;&gt;13 -->\r\n<g id=\"edge13\" class=\"edge\">\r\n<title>12&#45;&gt;13</title>\r\n<path fill=\"none\" stroke=\"black\" d=\"M798,-88.95C798,-80.72 798,-71.85 798,-63.48\"/>\r\n<polygon fill=\"black\" stroke=\"black\" points=\"801.5,-63.24 798,-53.24 794.5,-63.24 801.5,-63.24\"/>\r\n</g>\r\n<!-- 14 -->\r\n<g id=\"node15\" class=\"node\">\r\n<title>14</title>\r\n<polygon fill=\"#399de5\" fill-opacity=\"0.137255\" stroke=\"black\" points=\"990,-53 872,-53 872,0 990,0 990,-53\"/>\r\n<text text-anchor=\"middle\" x=\"931\" y=\"-37.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">gini = 0.497</text>\r\n<text text-anchor=\"middle\" x=\"931\" y=\"-22.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">samples = 632</text>\r\n<text text-anchor=\"middle\" x=\"931\" y=\"-7.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">value = [293, 339]</text>\r\n</g>\r\n<!-- 12&#45;&gt;14 -->\r\n<g id=\"edge14\" class=\"edge\">\r\n<title>12&#45;&gt;14</title>\r\n<path fill=\"none\" stroke=\"black\" d=\"M844.52,-88.95C858.16,-79.25 873.05,-68.68 886.56,-59.07\"/>\r\n<polygon fill=\"black\" stroke=\"black\" points=\"888.65,-61.88 894.78,-53.24 884.6,-56.18 888.65,-61.88\"/>\r\n</g>\r\n</g>\r\n</svg>\r\n"
          },
          "metadata": {}
        }
      ],
      "execution_count": 15,
      "metadata": {
        "execution": {
          "iopub.execute_input": "2021-09-07T16:56:21.746Z",
          "iopub.status.busy": "2021-09-07T16:56:21.738Z",
          "iopub.status.idle": "2021-09-07T16:56:21.765Z",
          "shell.execute_reply": "2021-09-07T16:56:22.032Z"
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def plot_curve(ticks, train_scores, test_scores):\n",
        "    train_scores_mean = -1 * np.mean(train_scores, axis=1)\n",
        "    train_scores_std = -1 * np.std(train_scores, axis=1)\n",
        "    test_scores_mean = -1 * np.mean(test_scores, axis=1)\n",
        "    test_scores_std = -1 * np.std(test_scores, axis=1)\n",
        "\n",
        "    plt.figure()\n",
        "    plt.fill_between(ticks, \n",
        "                     train_scores_mean - train_scores_std, \n",
        "                     train_scores_mean + train_scores_std, alpha=0.1, color=\"b\")\n",
        "    plt.fill_between(ticks, \n",
        "                     test_scores_mean - test_scores_std, \n",
        "                     test_scores_mean + test_scores_std, alpha=0.1, color=\"r\")\n",
        "    plt.plot(ticks, train_scores_mean, 'b-', label='Training score')\n",
        "    plt.plot(ticks, test_scores_mean, 'r-', label='Test score')\n",
        "    plt.legend(fancybox=True, facecolor='w')\n",
        "\n",
        "    return plt.gca()\n"
      ],
      "outputs": [],
      "execution_count": 16,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2021-09-07T16:56:21.776Z",
          "iopub.execute_input": "2021-09-07T16:56:21.782Z",
          "iopub.status.idle": "2021-09-07T16:56:21.794Z",
          "shell.execute_reply": "2021-09-07T16:56:22.043Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def plot_validation_curve(clf, X, y, param_name, param_range, scoring='roc_auc'):\n",
        "    plt.xkcd()\n",
        "    ax = plot_curve(param_range, *validation_curve(clf, X, y, cv=num_folds, \n",
        "                                                   scoring=scoring, \n",
        "                                                   param_name=param_name, \n",
        "                                                   param_range=param_range, n_jobs=-1))\n",
        "    ax.set_title('')\n",
        "    ax.set_xticklabels([])\n",
        "    ax.set_yticklabels([])\n",
        "    ax.set_xlim(2,12)\n",
        "    ax.set_ylim(-0.97, -0.83)\n",
        "    ax.set_ylabel('Error')\n",
        "    ax.set_xlabel('Model complexity')\n",
        "    ax.text(9, -0.94, 'Overfitting', fontsize=22)\n",
        "    ax.text(3, -0.94, 'Underfitting', fontsize=22)\n",
        "    ax.axvline(7, ls='--')\n",
        "    plt.tight_layout()\n",
        "    \n"
      ],
      "outputs": [],
      "execution_count": 17,
      "metadata": {
        "execution": {
          "iopub.status.busy": "2021-09-07T16:56:21.814Z",
          "iopub.execute_input": "2021-09-07T16:56:21.837Z",
          "shell.execute_reply": "2021-09-07T16:56:22.054Z",
          "iopub.status.idle": "2021-09-07T16:56:21.868Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.model_selection import validation_curve\n",
        "num_folds = 7\n",
        "plot_validation_curve(clf, X_train, y_train, param_name='max_depth', param_range=range(10,16))"
      ],
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": "<Figure size 432x288 with 1 Axes>",
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XtcFdX6P/DP4iYIKCLgBUVN/ap4F8xUkuSYR80LqaV5Q+2Enq+3fqUnO100NYv6diyjY2qplRbH7HjJ0qy8pCcDMRUV8kCCSpqheANRuTy/PzabQDawwb1nZtvn/XrxYjOzZtazB4ZnrzVr1igRARERkdE46R0AERGRJUxQRERkSExQRERkSExQRERkSExQRERkSExQRERkSExQRERkSExQRERkSExQRERkSC5aV+jn5yfNmzfXuloiQziZlQsAuMffU+dIiH5XWAjk5wPOztrUl5R08IKI+FdVTvME1bx5cyQmJmpdLZEhjFq+HwDwryk9dY6EqKzz54ErVwAvL/vXFRioTllTjl18REQEf3/A3R3Iy9M7kt9p3oIi+iMbGdJE7xCILHJyAho3Bk6dAm7dAtzc9I6ICYpIU4+ENtU7BKIKubgAgYGmJOXsrN01qQrj0bd6oj+W7NxbAABfTwN8PHVg+fn5yMzMxI0bN/QO5a5UVARcugQoZfqqKWdnd3h5NYGzs2uNtmeCItLQX9ceBMBBEncqMzMT3t7eaN68OdSd/AelCuXnAwUFNW9FiQguXbqIS5cyUbduixrtg4MkiMjh3LhxA/Xr12dysiMXF1NyKiqq2fZKKdSrVx+FhTVv5TJBEZFDYnKyL6UAV1fT95o+eP1Of0dMUEREZJE5SYnUPEndCSYoIqJqunjxIrp06YIuXbqgYcOGCAwMLPn51q1bVu1j0qRJOHHiRKVl3nnnHaxbt84WIdeYk5MpSdW0q+9OcJAEkYbG3ddM7xDIBurXr4/Dhw8DAObPnw8vLy/Mnj27TBkRgYjAyclyO2D16tVV1jNt2rQ7D9YGnJ1NSco8HVJV781WmKCINDSkc2O9Q7jrPPkkUJwrbKZLF+DNN6u/XVpaGiIjIxEWFob4+Hhs3boVL730En788Ufk5eVh1KhRePHFFwEAYWFhiI2NRYcOHeDn54epU6di27ZtqF27NjZv3oyAgAA8//zz8PPzw5NPPomwsDCEhYVh586duHLlClavXo1evXohNzcXEyZMQFpaGoKDg5Gamor33nsPXbp0KRPbnDlz8MUXX8DFxQUDBw5ETEwMfv31V0yZMgXp6elQSmHFihXo0aMHXnvtNXz44YcAgClTpmDGjBnIyEjDsGGR6NUrDAcOxGPjxq04ejQJixcvwM2bN9GqVWusWLEKnp62m2eSXXxEGjp7OQ9nLxtoLhmyueTkZDz++OM4dOgQAgMD8eqrryIxMRFHjhzB119/jeTk5HLbXLlyBeHh4Thy5Ah69uyJVatWWdy3iCAhIQGvv/46FixYAAB4++230bBhQxw5cgRz587FoUOHym13/vx5fPnllzh+/DiSkpLw7LPPAjC10B588EEkJSXh4MGDaNeuHRISErBu3TokJCRg//79+Oc//4mkpCQAQEpKMiZOfBw//HAIrq6u+L//exXbt3+L+Pgf0bFjJ8TGvmWrwwiALSgiTf2/f5k+6vM+KNupSUvHnlq2bInu3buX/PzJJ5/g/fffR0FBAc6ePYvk5GQEBweX2cbDwwMDBw4EAISEhGDv3r0W9z18+PCSMhkZGQCAffv24ZlnngEAdO7cGe3bty+3na+vL5ycnPDEE0/goYcewuDBgwEAu3fvRlxcHADAxcUFderUwd69ezFixAjUrl0bABAZGYl9+/ahf//+aNmyJXr16o6bN4H9+79HSkoywsN7AQBu3bqFXr3CanTMKsIERURkQ6W7uFJTU/HWW28hISEBPj4+GDdunMXZL9xKTXzn7OyMgoICi/uuVatWuTJixfA6V1dXJCYm4uuvv0ZcXByWLVuGHTt2ACg/FLyy/Xl6ekIp0zx9RUWC/v0HYPXqj6qsv6bYxUdEZCdXr16Ft7c36tSpg3PnzuGrr76yeR1hYWFYv349AODo0aMWuxCvXbuGq1evYvDgwViyZElJN2Dfvn3x7rvvAgAKCwtx9epV9OnTBxs3bkReXh5ycnKwefNm3H///WX25+QE3H9/L3z33R6cPHkSAJCbm4vU1FSbvjdNWlBKqWgA0QAQFBSkRZVERLrr1q0bgoOD0aFDB9xzzz3o3bu3zeuYMWMGJkyYgE6dOqFbt27o0KED6tatW6bMlStXMHz4cNy8eRNFRUX4xz/+AQCIjY3FE088geXLl8PFxQXLly/Hvffei8cee6ykm/Kvf/0rOnbsiLS0tDL7bNy4AVaseB9jx45Cfr5paP2CBYvRunVrm703ZU3z0JZCQ0OFDyykPyo+sNA2UlJS0K5dO73DMISCggIUFBTA3d0dqamp6N+/P1JTU+HiYv/2h4hp6HlRkalVZcl//5sCX9+yv6vAQHVQREKr2j+vQRFp6In779E7BLrL5OTk4E9/+hMKCgogIiWtIS2YZ5q4dcuUrGw9+xQTFJGG+gU30DsEusv4+Pjg4MGDutVfOkmZf7YVDpIg0tDPWTn4OStH7zCIbMpe0yExQRFp6O//Poq///uo3mEQ2Zx5OqTCQtvtkwmKiIhswsXF9GWrlhQTFBER2YyLi+k6lC2SFBMUEVE12eJxGwCwatUq/Prrr3aMVHvmmSaAO3+GFEfxERFVkzWP27DGqlWr0K1bNzRs2NDWIZZTWFgIZ2dnu9cD/J6kzMPPa4oJikhDMyJsd5c9FTPS8zYAfPDBB3jnnXeKJ0/thdjYWBQVFWHSpEk4fPgwRATR0dFo0KABDh8+jFGjRsHDwwMJCQll5uRbsmQJVq5cCVdXV3Ts2BFr167FtWvXMH36dPz4449QSmHBggWIjIzE2rVrERMTAxHB0KFDsXjxYhQUFMDPzw/Tp0/Hjh078NZbb8HFxQWzZ89GTk4OAgICsGbNGjRoYJ9bH8wj+8xP463J8HMmKCINhbX20zsEsqNjx45h48aN+P777+Hi4oLo6GjExcWhZcuWuHDhAo4eNY3gvHz5Mnx8fPD2228jNja23LObAOC1117DqVOn4ObmhsuXLwMwtdb8/f1x9OhRiAguX76MzMxMPP/880hMTETdunXRr18/bN26FQMGDMCVK1fQrVs3LFq0CDdv3kTfvn2xZcsW+Pn5Yd26dXjhhRewYsUKux0PZ2dTorp2DahTp/rbM0ERaej42SsAgPaN61ZRkqxmoOdtfPPNNzhw4ABCQ02z+OTl5aFp06b485//jBMnTmDWrFkYNGgQ+vfvX+W+2rdvj3HjxmHYsGGIjIws2f+mTZsAmGYhr1evHnbu3ImIiAj4+Zk+/IwZMwbfffcdBgwYADc3Nzz88MMATNNDHT9+HP369QNg6vJr0qSJzY/B7ZydAW9v4Pp1oPgJHlZjgiLS0ILPTTNNcy6+u5OIYPLkyVi4cGG5dUlJSdi2bRuWLl2Kzz77rMqWy1dffYU9e/Zg8+bNWLRoEY4dOwYRqdbjMTw8PErKiwg6depU4bOm7KlhQ+D0aeDmTaD4iSFW4Sg+IiIb6devH9avX48LFy4AMI32O336NLKysiAieOSRR0oeAQ8A3t7euHbtWrn9FBYWIjMzExEREXj99deRlZWF69evo3///oiNjQVgSjiXLl3Cfffdh127duHixYsoKChAXFwcwsPDy+0zODgYv/zyCxISEgCYHjB4/Phxex2KMpydgcBA08SyFTzqyiK2oIiIbKRjx46YN28e+vXrh6KiIri6uuLdd9+Fs7MzHn/88ZIWUExMDABg0qRJ+Mtf/lJukERBQQHGjBmDa9euoaioCM888wy8vb0xb948/O///i86dOgAZ2dnLFy4EEOHDsWCBQvwwAMPQEQwZMgQPPTQQ+UeelirVi1s2LABM2fOxLVr11BQUICnn37a4hN47cHNDWjSxNSSshYft0GkIT5uwzb4uA3Hcfvv6tIlwNfXusdtsIuPiIg0U6+e9WXZxUekob8NaKN3CEQOgwmKSEMhzXz1DuGuYWlEGxnLnV5CYhcfkYYOnsrGwVPZeofh8Nzd3XHx4sU7/gdI9iMiuHjxItzd3Wu8D7agiDT02vYTADhI4k41adIEmZmZyMrK0jsUqoS7u/sd3QzMBEVEDsfV1RUtWrTQOwyyM3bxERGRITFBERGRITFBERGRIfEaFJGGXhwSrHcIRA6DCYpIQ3zMBpH12MVHpKF9qRewL/WC3mEQOQS2oIg09PbOVAB8si6RNdiCIiIiQ2KCIiIiQ2KCIiIiQ2KCIiIiQ+IgCSINLR7eUe8QiByGJglKKRUNIBoAgoKCtKiSyJBa+nvpHQKRw9Cki09EVohIqIiE+vv7a1ElkSF9k3we3ySf1zsMIofALj4iDa3cexIA0C+4gc6REBkfB0kQEZEhMUEREZEhMUEREZEhMUEREZEhcZAEkYaWjOqidwhEDoMJikhDjX089A6ByGGwi49IQ58fOYvPj5zVOwwih8AWFJGG1v5wCgAwpHNjnSMhMj62oIiIyJCYoIiIyJCYoIiIyJCYoIiIyJA4SIJIQ8vGhegdApHDYIIi0pCvp5veIRA5DHbxEWno08Qz+DTxjN5hEDkEJigiDW04mIkNBzP1DoPIITBBERGRITFBERGRITFBERGRITFBERGRIXGYOZGG1ky6V+8QiBwGExSRhjzcnPUOgchhsIuPSEMf7c/AR/szdI6CyDEwQRFpaGvSOWxNOqd3GEQOgQmKiIgMiQmKiIgMiQmKiIgMiQmKiIgMicPMiTT0ryk99Q6ByGGwBUVERIbEBEWkoRXf/YwV3/2sdxhEDkGTBKWUilZKJSqlErOysrSoksiQvk35Dd+m/KZ3GEQOQZMEJSIrRCRUREL9/f21qJKIiBwcu/iIiMiQmKCIiMiQOMycSEPurpzNnMhaTFBEGvpgMp8HRWQtdvEREZEhMUERaWjpt6lY+m2q3mEQOQQmKCIN/SftAv6TdkHvMIgcAhMUEREZEhMUEREZEhMUEREZEoeZE2moXm03vUMgchhMUEQaend8iN4hEDkMdvEREZEhMUERaShm+0+I2f6T3mEQOQR28RFp6MdTl/QOgchhsAVFRESGxARFRESGxARFRESGxGtQRBpqVNdd7xCIHAYTFJGG3hzdVe8QiBwGu/iIiMiQmKCINPTS58fx0ufH9Q6DyCGwi49IQ8lnr+odApHDqLIFpZRyVkq9rkUwREREZlUmKBEpBBCilFIaxENERATA+i6+QwA2K6U+BZBrXigi/7ZLVERE9IdnbYLyBXARQESpZQKACYqoGu7x99Q7BCKHYVWCEpFJ9g6E6I/gleGd9A6ByGFYNcxcKdVEKbVRKfWbUuq8UuozpVQTewdHRER/XNbeB7UawBYAjQEEAvi8eBkRVcOz/07Cs/9O0jsMIodgbYLyF5HVIlJQ/LUGgL8d4yK6K53MysXJrNyqCxKR1QnqglJqXPE9Uc5KqXEwDZogIiKyC2sT1GQAjwL4FcA5ACOLlxEREdlFlaP4lFLOAEaIyNCaVqKUigYQDQBBQUE13Q0REf2BWDuTxLA7qUREVohIqIiE+vvz0hX9cQU3roPgxnX0DoPIIVh7o+5/lFKxAP6FsjNJ/GiXqIjuUvOGtNc7BCKHYW2C6lX8fUGpZYKyM0sQERHZjDXXoJwALBOR9RrEQ3RXezLuEAA+WZfIGtZcgyoCMF2DWIjueueu3MC5Kzf0DoPIIVg7zPxrpdRspVRTpZSv+cuukRER0R+atdegzPc8TSu1TADcY9twiIiITKydzbyFvQMhIiIqrdIuPqXU30q9fuS2dYvtFRTR3apbs3ro1qye3mEQOQQlIhWvVOpHEel2+2tLP1srNDRUEhMTaxQsERE5PqXUQREJrapcVYMkVAWvLf1MRERkM1UlKKngtaWfiagKUz86iKkfHdQ7DCKHUNUgic5KqaswtZY8il+j+Gd3u0ZGdBe6dP2W3iEQOYxKE5SIOGsVCBERUWnW3qhLRESkKSYoIiIyJGtnkiAiG+jdyk/vEIgcBhMUkYZm/qm13iEQOQx28RERkSExQRFpKGpVAqJWJegdBpFDYBcfkYZu5BfqHQKRw2ALioiIDIkJioiIDIkJioiIDInXoIg09Kd2AXqHQOQwmKCINBTdp6XeIRA5DHbxERGRITFBEWlo1PL9GLV8v95hEDkEJigiIjIkJigiIjIkJigiIjIkJigiIjIkDjMn0tDgTo30DoHIYWiSoJRS0QCiASAoKEiLKokMaXzP5nqHQOQwNOniE5EVIhIqIqH+/v5aVElkSHm3CpF3izOaE1mD16CINDRxdQImrubzoIiswQRFRESGxARFRESGxARFRESGxARFRESGxPugiDQ0MqSJ3iEQOQwmKCINPRLaVO8QiBwGu/iINJSdewvZubf0DoPIIbAFRaShv649CAD415SeOkdCZHxsQRERkSExQRERkSExQRERkSExQRERkSFxkASRhsbd10zvEIgcBhMUkYaGdG6sdwhEDoNdfEQaOns5D2cv5+kdBpFDYAuKSEP/71+HAfA+KCJrsAVFRESGxARlQUZGBpRSUEpVWdZcLiMjw+5xWRuTLd24cQNz585Fq1atUKtWLSil0KVLFwDA/PnzoZTC/Pnz7VK3Hu+XHEdRURE+/vhjDBs2DIGBgahVqxZ8fX0RGhqKF154Ab/99pveIVpt8+bN6N27N+rUqVPyd3/48OGS/0XNmze3S70TJ06EUgpr1qyxy/7vFBMUVer5559HTEwMcnJyMGzYMERFRWHo0KGVbrN7924opfDAAw9UWMboJwYZW2ZmJu69916MHTsWW7duRVBQEIYPH45evXohPT0dixYtQsuWLbF+/Xq9Q63SoUOHMHLkSCQkJKBnz56IiopCVFQUfH19K92uefPmlX44tuY8NDpeg6JKffrppwCAvXv3onXr1mXWTZ8+HaNHj4afn59d6k5JSbHLfsmxZWdn4/7770dGRgYeeOABrFq1Ci1atChZn5+fjzfeeAPPPfccRo8eDWdnZ4wYMULHiCu3adMmFBQU4O9//ztefvnlMuvy8/ORkpICV1dXu9T9yiuvYO7cuWjUqJFd9n+nmKCoUmfOnAGAcskJAPz8/OyWnACgbdu2dtu3Xp64/x69Q3B406ZNQ0ZGBrp3745t27bB3d29zHpXV1fMnTsXHh4eePLJJzF58mSEh4fb9W/1TlR2jrm6utr1PGjUqJFhkxMAQEQ0/QoJCRGjS09PFwBiOjyVM5dLT08vt0xEJC4uTu677z7x9PQULy8viYiIkL1791a4v6SkJImMjJR69epJ7dq1pWvXrrJy5cpy+71dTk6OxMTESGhoqHh7e4u7u7sEBwfLvHnz5Nq1a+XKz5s3TwDIvHnzJCMjQyZOnCiBgYHi7Owss2bNkmbNmpXUd/vXrl27yu3DLDw8vMLtwsPDyxxbS1+rV6+2eBxLM8eWnp4uO3bskIiICKlTp454eHhIjx49ZPPmzRUe35MnT8rYsWPF39+/5Bi9/vrrUlBQUGa/ZExpaWni5OQkACQhIaHSskVFRdKxY8cyf6M9evQQALJp06YKt3v66acFgMyePbvcuu3bt8uQIUMkICBAXF1dpWHDhjJ69GhJSkoqV9b8t96sWTPJz8+X119/XTp16iS1a9eWunXrlpw/lr6ioqLK7cNs9erVlZ5D6enpVZ6HZlFRUeXOO5Gy5/avv/4q0dHREhgYKG5ubtK8eXN55plnJC8vz+Lxu3Xrlrz66qvStm1bqVWrljRo0EDGjx8vp06dKv2ez4oV+YItKDt68cUX8fLLLyMsLAwPPfQQkpKSsHPnTuzbtw+7d+9Gz55lhxrv2bMHAwcORF5eHtq0aYOuXbvi3LlzmDJlCpKTkyusJzMzE3/+85+RnJwMf39/9OzZE+7u7jhw4ABeeuklbNy4Ebt370a9evXKbZuamoquXbvC3d0dvXv3RkFBAXx8fDBy5EhcuHABH3zwAQAgKiqqZJuGDRtWGMuAAQPg7u6Or776Cg0aNMCAAQNK1rVt2xZeXl6IiorCvn378PPPP6N3795o1apVSZnSr6vy/vvv4+WXX0b37t0xaNAgnDhxAvHx8YiMjMT69esxcuTIMuWPHTuG8PBwZGdnIygoCBEREbh8+TKef/55JCQkWF3vnfg5KwcA0NLfS5P67jZbt25FUVER2rdvj+7du1daVimFCRMmYM6cOdiyZQvmz5+PiRMnIj4+HmvWrMGwYcPKbVNYWIh169YBMF0nLW3WrFlYunQpXFxc0L17dzRp0gRpaWmIi4vDpk2b8Nlnn2HQoEHl9ikiGDFiBLZv344+ffogODgYp0+fRpcuXSo8F8LCwip8X61atUJUVBQ2bNiA3NxcjBgxAl5ev/89eXl5VXkeWuvMmTMICQmBiKBXr164evUq9u3bh5iYGCQnJ2PLli3ljt/QoUOxfft2eHh4ICIiAt7e3ti5cydCQkIwePBgq+sGwBaUJbZqQfn6+kpiYmLJ8sLCQnniiScEgPTr16/Mfq5fvy6BgYECQJ599lkpKioqWbd7926pXbu2xZiKioqkZ8+eAkCmT58uubm5ZfY5bty4Mp/IzEp/eps4caLcvHmz0vdniaUWlIjIrl27yn1Su11Fn9ysqdvc0nFzc5Nt27aVWbdw4UIBIK1atSqzvKioSLp06SIAZPLkyXLr1q2SdT/99JM0atTI4u/S1h5993t59N3v7bb/u9348eMFgEyaNMmq8rt37xYA4uTkJPn5+XLp0iVxd3cXV1dXycrKKlf+iy++EAASGhpaZvmyZcsEgLRv315SUlLKrNu4caO4uLiIj4+PZGdnlywv/X8kKChIUlNTLcZY2blgqQVlVlWL/07Ow9L/H/7yl7+U+f+QnJwsXl5eAkD27dtXZrslS5aUxHvy5MmS5Tdu3JDRo0eXbslZ1YLiKD47eumllxASElLys5OTExYtWgTANOggPz+/ZN2GDRvwyy+/oGXLlli4cGGZ4dXh4eGYOnWqxTq2b9+O/fv347777sNbb72F2rVrl6zz8PDAu+++i4CAAKxbtw6XLl0qt72vry+WLl0KNze3O36/WpsxY0aZT4YA8Le//Q1169ZFWloaTp8+XbJ87969OHz4MOrVq4c333yzzEXnNm3a4IUXXtAsbqq5rKwsAECDBg2sKm8uV1RUhOzsbPj4+CAyMhL5+fn4+OOPy5U3jyot3XoqLCzEggULAADr168v1wKJjIzElClTcPnyZaxdu9ZiHK+88kq1egeMomnTpuX+P7Rr1w7jx48HAHz77bdlyi9duhQAsGjRojIDV2rVqoXY2Fh4enpWq34mKDuy1JwNCAhAvXr1cPPmTVy8eLFk+Z49ewCgZNTR7cx/ELf78ssvAQAjRoyAk1P5X6enpydCQ0NRUFCAAwcOlFv/4IMPwtvb27o3ZDCWjq+bmxvuucc0EOHs2bMly83Hd/DgwRbf75gxY+wUJelJRMotMyef229xuHz5MrZs2QI3Nzc89thjJcsPHz6Mc+fOoX379ggODrZYT3h4OABg//79Ftc//PDDNYhefxEREfDw8Ci33JykS59jZ86cQXp6OpydnTFq1Khy29SvXx8PPvhgtepngrKgdOvF0h+4pXWWbigNCgqyuF2dOnUAmG6CNcvMzASAMp86SqvoRr2TJ08CAObMmVNyg9/tX+YkZv70WVqzZo47u3Z1ju8vv/wCoOL3W7duXdStW9fGEZKtmUfinT9/3qry5pt1nZycSu4revDBBxEYGIhDhw7h6NGjJWXj4uJw8+ZNDB06tMw9SOZz7Pjx4xWeY48++igAy+dYQECAxX/yjqAm51ijRo0qHBZf3f83HCRhQelustzc3DIXIEvLyckpeW2pjKUWTU1VNKNCYWEhANMnuKruNrf0x+GoJw5Qs+Nb2cwUtvx9kX2EhIRg7dq1+OGHH6wqbx780rlzZ7i4mP7dOTk5YcKECXjllVewZs0avPHGGwBQMiDo9sER5nMsMDAQ/fr1q7Q+SwMQeI7VfH9MUBb4+vrC09MTubm5SEtLK5na53apqakATN1olkbIVUdgYCAAVHhXeHp6usXlTZs2BQA88sgjmDZt2h3FcDdr3Nj0mItTp05ZXH/16lWL1+hsbUZE+XtdyHqDBw/G008/jZSUFBw4cKDSkXwigg8//BAAMGTIkDLrJk6ciFdeeQXr1q1DTEwMfv75Z/zwww9o2LBhueua5nOsUaNGnPmkEuZz7OzZs8jPz7fYiqrulHD8yGiBs7Mz+vTpAwD47LPPKiy3YcMGAKbWy51++jb3YcfFxZV8YivNPPT1dgMHDgTw+4wPRmC+oFpQUHBHZWzJ/PvcunVrmZav2SeffKJJHGGt/RDW2pg3jDqCVq1aldw+MG3atDJdTLdbunQpjh07Bm9v73If3v7nf/4HPXv2xPnz57F9+/aS1tO4cePKXQO+9957Ub9+fRw6dAhpaWk2fkc1V9U5pPU5FhQUhGbNmqGwsNDi/6Ps7Gx8/fXX1donE1QFZs+eDaUU3njjDXzxxRfl1n/++edYsmQJlFKYPXv2Hdc3cuRINGrUCGlpaZg/f36Z61v79u3DsmXLLG4XGRmJkJAQ7NmzB1OnTkV2dna5MidPnsQ777xzxzFay9waTEtLq/DkMJfRajqj8PBwdOrUCdnZ2XjqqafKxJWamloySsvejp+9guNnr2hS193qnXfeQVBQEA4cOIBBgwaV+1Sen5+PmJgYPPXUU1BK4b333kNAQEC5/UyaNAkAsGrVqpLRd7d37wGm2RxeeOEFFBYWIjIy0uI9c7m5ufjkk080nZ6rqnPImvPQ1mbMmAEAeO6558r0Vty6dQszZ860+OGwMuziq0BERAReffVVzJ07F4MHD0b79u3RoUMHiAiOHz9ecsE0JiYGffv2veP6ateujbVr1+Khhx7CokWLsGHDhpIbdb/77jvMmjULS5YsKbedk5OGApPDAAAKWElEQVQTNm3ahEGDBmH58uX4+OOP0blzZzRp0gQXLlzA6dOn8d///hcNGjTQrAuwWbNm6Nq1Kw4dOoROnTohJCQEtWrVQps2bTBnzhwAwLBhw7BgwQK8+eabOHbsGJo0aQKlFCZPnoxevXrZPCalFD788EP07dsXK1euxI4dO9CzZ09cvnwZu3btwuDBg3HgwAGcPn3arkPuF3xuuuGaz4OqOT8/P+zduxfDhg3Drl270KpVK/To0QPNmjXDtWvX8P333yM7Oxuenp5YuXJlyQCG240aNQqzZs3Cxo0bAQChoaFo3769xbKzZs3CqVOnsGTJEvTo0QOdOnVCy5YtUVRUhDNnzuCnn37C9evXsW3bNrRr185u7720hx9+GLt378bYsWPRv39/+Pj4AABiYmJQv359q85DW5s1axZ27NiBHTt2oF27doiIiICnpye+//575OXlYcKECeZu1yKrdmjNzVJ3+gUgGkAigMSgoKAKbxozovj4eBk/fry0aNFC3N3dxd3dXVq0aCHjx4+X+Ph4i9ugipt8K7vB7vDhwzJ06FDx8fERDw8P6dy5syxbtqzK/ebl5UlsbKz06dNH6tWrVzINS0hIiMyePVv+85//lClf0U221Xkvle0jPT1dHn30UWnQoIE4OztbvGHw008/lR49epTc9IcaTHVkiXmaF/OUTKWlpaXJmDFjxM/PT2rVqiVt27aVxYsXy40bN8TNzU2cnJwqnMLFFnijru0UFBTIRx99JIMHD5ZGjRqJq6ur+Pj4SLdu3eS5556Tc+fOVbmPsWPHlvydxcbGVll+z549Mnr0aGnatKm4ublJ3bp1pW3btjJq1ChZt26d5OTklJSt7Cbb0mp6o25hYaEsXLiwZEoh8/sofV5UdR5aM9WRJebplm6fAEBE5ObNm7J48WJp06aNuLm5SUBAgIwZM0bS09Pl8ccfN8eZIVbkDiWVDKO2h9DQUElMTNS0TqKq7N27F3369EGHDh3KDD22tVHLTffJsAVFfzQFBQXo0KEDTpw4AQApImL5prJSeA2K/jBycnIs9tenpKQgOjoagOVrEERkvcOHD5eZJQcArl+/jpkzZ+LEiRPo0KEDAFy3Zl+8BkV/GL/++iuCg4PRunVrtG7dGl5eXsjIyMDBgwdRWFiIiIgIzJw5U+8wiRza9OnTcfz4cXTu3BmNGjVCVlYWjhw5ggsXLsDHxwdr1qxBaGioVftigqI/jICAADz11FPYuXMn4uPjceXKFXh6eqJHjx4YPXo0pk6darcHw5n9bUAbu+6fSG/R0dH4+OOPcfToUcTHxwMw3Uv26KOPYs6cOdV6fD2vQRERkaaUUgdFpMpmFK9BEWno4KlsHDxV/l41IiqPCYpIQ69tP4HXtp/QOwwih8AERUREhsQERUREhsQERUREhsQERUREhsT7oIg09OKQKmd3IaJiTFBEGmrfmI+VJ7IWu/iINLQv9QL2pV7QOwwih8AWFJGG3t6ZCgB8qi6RFdiCIiIiQ2KCIiIiQ2KCIiIiQ2KCIiIiQ+IgCSINLR7eUe8QiBwGExSRhlr6e+kdApHDYBcfkYa+ST6Pb5LP6x0GkUNgC4pIQyv3ngQA9AtuoHMkRMbHFhQRERkSExQRERkSExQRERkSExQRERkSB0kQaWjJqC56h0DkMJigiDTU2MdD7xCIHAa7+Ig09PmRs/j8yFm9wyByCGxBEWlo7Q+nAABDOjfWORIi42MLioiIDIkJioiIDIkJioiIDEmTBKWUilZKJSqlErOysrSokoiIHJwSEU0rDA0NlcTERE3rJDKK7NxbAABfTzedIyHSj1LqoIiEVlWOo/iINMTERGQ9XoMi0tCniWfwaeIZvcMgcghMUEQa2nAwExsOZuodBpFDYIIiIiJDYoIiIiJDYoIiIiJDYoIiIiJD4jBzIg2tmXSv3iEQOQwmKCINebg56x0CkcNgFx+Rhj7an4GP9mfoHAWRY2CCItLQ1qRz2Jp0Tu8wiBwCExQRERkSExQRERkSExQRERkSExQRERmS5s+DUkpdA3BC00odnx+AC3oH4YB43KqPx6z6eMyqr42IeFdVSI/7oE5Y86Aq+p1SKpHHrPp43KqPx6z6eMyqTyll1VNr2cVHRESGxARFRESGpEeCWqFDnY6Ox6xmeNyqj8es+njMqs+qY6b5IAkiIiJrsIuPiIgMiQmKiIgMiQmKiIgMiQmKiIgMiQmKiIgMiQmKiIgMiQmK7gpKKVFKfVTqZxelVJZSams195OhlPK70zL2ppSar5SaXcNtFyil+hW/flIpVdu20RHZBhMU3S1yAXRQSnkU//wggF90jMewRORFEfmm+McnATBBkSExQdHdZBuAh4pfPwbgE/MKpZSvUmqTUipJKfWDUqpT8fL6SqkdSqlDSqnlAFSpbcYppRKUUoeVUsuVUs6VVa6UGqCU+lEpdUQp9W0V9c5XSn1QXHeGUmq4Uuo1pdRRpdR2pZRrcbkMpVRMcRwJSqlWFuptWbzNQaXUXqVU2+Llm5VSE4pfT1FKrSt+vUYpNVIpNRNAYwC7lFK7lFKPK6WWlNrvE0qpf1T3l0BkK0xQdDeJAzBaKeUOoBOA+FLrXgJwSEQ6Afg7gA+Ll88DsE9EugLYAiAIAJRS7QCMAtBbRLoAKAQwtqKKlVL+AFYCGCEinQE8UkW9ANASpoQ6DMBaALtEpCOAPPyeaAHgqojcCyAWwJsWql8BYIaIhACYDeCfxcujAbyolLofwNMAZpTeSESWAjgLoK+I9IXp+A01J0cAkwCsrug9E9mbHo/bILILEUlSSjWHqfX05W2rwwCMKC63s7jlVBdAHwDDi5d/oZS6VFz+TwBCABxQSgGAB4DfKqn+PgDfiUh68b6yq6gXALaJSL5S6igAZwDbi5cfBdC81L4/KfV9SanlUEp5AegF4NPiOAGgVnF955VSLwLYBeDhUjFZJCK5SqmdAAYrpVIAuIrI0cq2IbInJii622wB8H8AHgBQv9RyZaGs3Pa9NAXgAxF51sp6VSX7qajemwAgIkVKqXz5fWLMIpQ9N6WC14CpF+RycSvPko4ALsLUlWeN92Bq6f0Etp5IZ+zio7vNKgALLHzy/w7FXXRKqQcAXBCRq7ctHwigXnH5bwGMVEoFFK/zVUo1q6Te/QDClVItzOWrqLc6RpX6vr/0iuJ9pSulHimuQymlOhe/vhfAQABdAcw2x3abawC8S+0vHkBTAGNQ6hoekR7YgqK7iohkAnjLwqr5AFYrpZIAXAcQVbz8JQCfKKV+BLAHwOni/SQrpZ4HsEMp5QQgH8A0AKcqqDdLKRUN4N/F5X+DaSRhRfVWRy2lVDxMHygfs7B+LIBlxfG6AohTSv0E0zWxSSJyVin1NIBVSqmI27ZdAWCbUupc8XUoAFgPoIuIXAKRjvi4DSIDU0plAAgVkQsa1rkVwBIR+VarOoksYRcfEQEAlFI+Sqn/AshjciIjYAuKiIgMiS0oIiIyJCYoIiIyJCYoIiIyJCYoIiIyJCYoIiIypP8P6pQ9tQhU2V0AAAAASUVORK5CYII=\n"
          },
          "metadata": {
            "needs_background": "light"
          }
        }
      ],
      "execution_count": 18,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2021-09-07T16:56:21.894Z",
          "iopub.execute_input": "2021-09-07T16:56:21.913Z",
          "iopub.status.idle": "2021-09-07T16:56:30.482Z",
          "shell.execute_reply": "2021-09-07T16:56:30.496Z"
        }
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "#### Compare the scores of testing on training and testing data. If the scores are close to equal, is likely underfitting; however, if they are far apart, is likely overfitting. "
      ],
      "metadata": {
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn import cross_validation\n",
        "\n",
        "for i in range(5):\n",
        "\n",
        "    X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2)\n",
        "    clf = DecisionTreeClassifier(criterion='gini', max_depth=3, min_samples_leaf=3).fit(X_train, y_train)\n",
        "\n",
        "    print(\"Test score\", clf.score(X_test, y_test))   \n",
        "    print(\"Train score\", clf.score(X_train, y_train))   "
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Test score 0.4804928131416838\n",
            "Train score 0.5608628659476117\n",
            "Test score 0.5420944558521561\n",
            "Train score 0.5387776065742167\n",
            "Test score 0.4804928131416838\n",
            "Train score 0.5557267591165896\n",
            "Test score 0.5215605749486653\n",
            "Train score 0.5536723163841808\n",
            "Test score 0.5071868583162218\n",
            "Train score 0.5475089881869543\n"
          ]
        }
      ],
      "execution_count": 21,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2021-09-07T17:12:07.895Z",
          "iopub.execute_input": "2021-09-07T17:12:07.902Z",
          "shell.execute_reply": "2021-09-07T17:12:07.919Z",
          "iopub.status.idle": "2021-09-07T17:12:07.927Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.metrics import classification_report\n",
        "\n",
        "print(classification_report(y_test, prediction, target_names=y.name))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "             precision    recall  f1-score   support\n",
            "\n",
            "          B       0.44      0.11      0.17       249\n",
            "          u       0.48      0.86      0.61       238\n",
            "\n",
            "avg / total       0.46      0.47      0.39       487\n",
            "\n"
          ]
        }
      ],
      "execution_count": 36,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2021-09-07T17:37:33.034Z",
          "iopub.execute_input": "2021-09-07T17:37:33.043Z",
          "iopub.status.idle": "2021-09-07T17:37:33.056Z",
          "shell.execute_reply": "2021-09-07T17:37:33.063Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.metrics import confusion_matrix\n",
        "\n",
        "# Calculate the confusion matrix\n",
        "conf_matrix = confusion_matrix(y_true=y_test, y_pred=prediction)\n",
        "\n",
        "fig, ax = plt.subplots(figsize=(5, 5))\n",
        "ax.matshow(conf_matrix, cmap=plt.cm.Oranges, alpha=0.3)\n",
        "for i in range(conf_matrix.shape[0]):\n",
        "    for j in range(conf_matrix.shape[1]):\n",
        "        ax.text(x=j, y=i,s=conf_matrix[i, j], va='center', ha='center', size='xx-large')\n",
        " \n",
        "plt.xlabel('Predictions', fontsize=18)\n",
        "plt.ylabel('Actuals', fontsize=18)\n",
        "plt.title('Confusion Matrix', fontsize=18)\n",
        "plt.show()"
      ],
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": "<Figure size 360x360 with 1 Axes>",
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUUAAAFVCAYAAAB4ov3GAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XeYFFXaxuHfSxqCgiCIWVREMYvIGr5VVJQ1oGJAUBFWjOiuuqbdNRDMWVQwK0ERs4viukgwEVxQQVEER0DFQGbIM4T3+6MK9tDTM0xDT3cPPPd19dXTp05Vvd3MPJyqU91t7o6IiEQqZbsAEZFcolAUEQkoFEVEAgpFEZGAQlFEJKBQFBEJKBSlzMzsYDMbbmYLzMzNrHs57adzvP2W5bH9zUn8OvXNdh2bE4ViBWBmNc3sGjP7xMzmm9lKM5tlZu/FAVIlAzVUAd4A9gJuBToCb5b3frPFzBrFgeNm9m4Jfaqa2Zy4z4xN2NcZ5fUfjKTOdPF2bjOzxsAQoAkwDBgKzAW2A1rFt/vd/cZyrqMJMAW4zt0fKud9VQaqAkXuvqY891VKDY2A6cCKuJZd3P23hD5nAa/HfWa5e6ON3FdfoJO720asWx1Y7e4rN2bfUly5jzBk45lZDeBdYA/gLHdPHJnda2aHAYdloJzt4/v55b0jd18NrC7v/ZTRO0BbopHxfQnLLgK+AioDW2WqoPj3YqW7r3L3FZna75ZCh8+57WJgb+DBJIEIgLuPc/c+YVt8ODbKzJbEt1FmdnriumY2w8w+NLN9zGyImS02swIze93Mtg/6fQh8FD98ITisbFTa+b942zMS2o40s3+b2e9mtsLMfolPAxwe9Em6TTOrb2a9zexnMyuK73ub2bYJ/dauf5yZXW9mP5hZoZlNNbNOyV7HUswG3gP+nLCPHYDWwAvJVjKzFmbWN97nsvi1HWVmbRNfI6BT/LMHt85xW9/4cQMze97MZgFLgZ2DdfoG27sybrs1YT87xof6k82sZoqvwRZFI8XcdnZ8/3RZVzCzrkBv4DvgDsCBzsDbZnaZuyduayfgQ+At4AbgIOAyoDZwYtznTmAU8M+4lk/i9jmpPBkz2xv4APgd6AXMIhqBHhXvd2wp69YBRgONgeeBL4BDgCuA48yshbsvTljtLqAG8BRQGPfta2b57j4qhdKfJ3r9jnD3MXFbJ6LR7ItE/3klagvsA7wK/AhsG6/zppmd7+4D4353Eg1O/kg0Gl1rdML21r5utwO1gCXJCnX33mZ2HNDNzEa6+6dmVimuc2uglbsvK/tT3wK5u245egPmAYtS6F+X6I8lH6gdtNcGfgAWA9sE7TOIQrNdwnZ6x+37BG0t47bOCX07x+0tk9TzITAjePzXuG+LDTyPYtskCg8Huib0vTJuvz3J+l8C1YL2nYjC8eUyvJaN4m08TjR4+B14Olj+HfB6/POk8HnGbbWSbLMm0XnZbxPa+0Z/iknr6BvX8WIJyx3om+T3YAbwU/zzrXG/q7L9O10Rbjp8zm21gUUp9D+BaBTxqLuvWy/++TGi816tEtb51d1fTWgbEd83Tq3cDSqI70+PJwhS0ZZoZJo40n2KaOKpbbE1oI+7F6194O6/AFOJZtDLzN1XAQOAc+MrAY4iOq3xfCnrLF37c7zOtkShOAJoama1U6kBeCCFehcA5wE7AP8GugGD3f3xFPe5RVIo5rZFRIc8ZbV7fP9NkmWT4vs9EtqnJek7L77fNsmyTTGIaAb9n8B8MxthZjeZ2W5lWHd3YEocUOvEj6dQ/HlByc9tY57X80T/SZ1JNMHyK/Cfkjqb2XZm9nRwDnAuUahfHnfZJsX9T02ls7uPBu4F/hDv96IU97fFUijmtklAbTNL9gefTMqXdFD6LG9ZtlfaNV3rnbN290J3P4HoD/XueN89ge8SJyDSpKTnlvLr5O6Tgc+IDtfbAf09miUvvnEzI7p0qhPQHzgX+BPRSH7tucSU/vY8xfOAZlaNaCIIoB6wayrrb8kUirntjfg+2Yn8ZH6I7/dLsmzf+D7Z6GlTrL1Ep16SZbsnacPd/+vut8cB2ZhoJHXHBvYzDdg78UL1+HET0v+8knkeOJzoNETSWefYgUQTR/e4+w3u/qq7/8fdhxFdvpOoPC4WvhtoDtxIdMQxyMxqlcN+NjsKxdz2LNGh4fXJLqkBMLND4xlniGYolwJ/MbOtgz5bA38hmoT5IM01rj2sW+9cpZl1AHZMaKufZP2ZRId3yUI19DbQgOL/QVwSt79Vxno3xSCgB3C1u5d2OLt2BLneiNTM9if5uc8l8fINvQZlYmYnAdcC/dz9fqKJpyZEk0ayAbokJ4e5+zIzO5XoHS1vm9lQolCbRxQExxIdIt0X919oZjcSzR5/Fly/1ploRHaZuxeQRu4+xcyGAZfFh40TgIOJ/vjzid4NstYtZnYi0QXp04lCow3RpSuJF0Ynug84B+htZs2IZpYPAboQ/cexofU3WTxh1b0MXScTnde9Mb4mcApRKF1GdEqkWUL/scBVQB8zGwKsBD5z9+mp1hhfP9kP+D7eJu4+xMx6AVeb2X/cfVCq292SKBRznLvnm9khRH9QZwE3Ex2+zQfGE523Ghj072NmvxFdc9gtbp4ItHX3t8upzI5Es9vnxz9/QhTYTxBd2rLW20Qzou2AhsByoj/eS4DnStuBuxfEs749gNOILqaeBTwJdPPi1yhmjbuvNrNTiGaMOxFdETAp/vkgiofiy0QB354o+CsRPb+UQjG+HnEA0YRQa3cPr2W8ETgaeMrMNipwtxR677OISEDnFEVEAgpFEZGAQrECMrM/mdkUM8s3s79nux7JHfGHRsw2s0kb7i3JKBQrGIs+a7A3cBLRtYcdzGzf0teSLUhfogvFZSMpFCueFkC+u0+L39c7CEh6DaNsedz9YzLwmZebM4VixbMT8HPweGbcJiJpoFCseJK9b1fXVYmkiUKx4pkJ7BI83pnoE1tEJA0UihXPOGAvM9s9/iSU9sDgLNckstlQKFYw8ecHXkX0WX6TgVfdPdnnJ8oWyMxeBsYQfaLQTDPrku2aKhq9zU9EJKCRoohIQKEoIhJQKIqIBBSKIiIBhWIFZmaXZrsGyU363dh4CsWKTb/4UhL9bmwkhaKISKBCXadYv359b9SoLN+bvmWYM2cuDRok+4K8LdOKxYuyXULOWFBQQN06dbJdRs74bmr+olXuZXpBKtQXVzVqtBvjPxuT7TIkR035ON3f3iqbi4NbnTq7rH11+CwiElAoiogEFIoiIgGFoohIQKEoIhJQKIqIBBSKIiIBhaKISEChKCISUCiKiAQUiiIiAYWiiEhAoSgiElAoiogEFIoiIgGFoohIQKEoIhJQKIqIBBSKIiIBhaKISEChKCISUCiKiAQUiiIiAYWiiEhAoSgiElAoiogEFIoiIgGFoohIQKEoIhJQKIqIBBSKIiIBhaKISEChKCISUCiKiAQUiiIiAYWiiEhAoSgiElAoiogEFIoiIgGFoohIQKEoIhJQKIqIBBSKIiIBhaKISEChKCISUCiKiAQUiiIiAYWiiEhAoSgiElAoiogEFIoiIgGFoohIQKEoIhJQKIqIBBSKIiIBhaKISEChKCISUCiKiAQUiiIigSrZLkCK+/zzLxjw0kuMGPkh06fPoFatWuy3b1P+cdONtGp1/Lp+nS+6mH79B5S4nTt69uDmf/49EyVLOZg0NZ/BQ0cwdsJXzPxtFjWr59G40a5cel47jjz04JT7pdp3S2Xunu0ayqx580N9/Gdjsl1GuWt/3gUMHzGSs848g2aHHMKSJUt4oV9/Jk36hj6PP8oVl18GwJgxY/lh2rRi6/d67HHGj/+cz/87lmbNDsl0+Vkz5eMPsl1CWv3t9nsZ88VETjz6SPbdqzHLli/nzfeH8f30H+l2dVc6nH5ySv1S7bs5ObjVqfnLV/teZemrUMxBo0ePoVmzQ6hevfq6tuXLl3PwoYcxZ85cZv82kypVkg/yly1bxvY77UqjRrvx1ZefZ6rknLC5heIXkyazX5M9yatWbV3bisJCzrjkrywoKGDUmy9RpXLlMvdLZZubm1RCUecUc9CRRx6xXiAC1KhRg1NPPpkFCxbw+++/l7juW2//i8WLF9Op4wXlXaaUs2b7N10vvACq5+XR8ojDKFi8hLnzF6TUL9W+W6qshqKZ/cnMpphZvpnp5NcG/Prbb1SpUoW6deuW2Kdf/wFUqVKFC84/L4OVSSbNnjufKpUrU3urrdLSL9W+m7ushaKZVQZ6AycB+wIdzGzfbNWT6779djJvvvU2p7U5lVq1aiXt88svvzB8xEhan3gCDRs2zHCFkgn5M37ig09Hc+yRf6Bmjeqb3C/VvluCbI4UWwD57j7N3YuAQcDpWawnZy1atIhz2negZs2aPPzg/SX2G/DiQNasWUPnThdmsDrJlCVLl3FNj3uokZfHP7pevMn9Uu27pcjmJTk7AT8Hj2cCf0jsZGaXApcC7LrrrpmpLIcsX76cNqefybRp03l/yDulvgb9X3yRunXr0ubUUzJYoWTCisJCLr+5Jz//9jvP3NuDHRtut0n9Uu27JcnmSNGStBWbCnf3p929ubs3b9CgfgbKyh1FRUW0Pescxowdy2uDBnLMMUeX2HfcuPFMnvwdHdq3Iy8vL4NVSnkrWrmSq267k4nffscj3f5Oi4MO2KR+qfbd0mRzpDgT2CV4vDPwa5ZqyTmrVq2iXfvz+GDYcF4a0I9TNzD66zcguoi7U8eOmShPMmTV6tVc2/NeRn8+gfv/eR3HHtFik/ql2ndLlM2R4jhgLzPb3cyqAe2BwVmsJ2esWbOGCztfxL8Gv8OTfR6n/bntSu1fVFTEoFdeo2nTfWjR4rAMVSnlbc2aNdx090MMHzWW7td05ZTjjtmkfqn23VJlbaTo7qvM7CrgP0Bl4Hl3/yZb9eSS62+4iZcHvcIxRx9NjRo1ePGlgestP6HV8evNLr875D3mzZvHDdf9LdOlSjm698nnGDLiIw47aH+q5+Ux+IOR6y0/8tCDqV+vbpn7pbLNLVlW3/vs7u8B72Wzhlz0xZcTAPjo44/56OOPiy0fOWzoeqHYr/8AKlWqRMcLdG3i5uTb738AYNzESYybOKnY8n4P3UX9enXL3C+VbW7J9DY/2Wxsbm/zk/TR2/xERDaSQlFEJKBQFBEJKBRFRAIKRRGRgEJRRCSgUBQRCSgURUQCCkURkYBCUUQkoFAUEQkoFEVEAgpFEZGAQlFEJKBQFBEJKBRFRAIKRRGRgEJRRCSgUBQRCSgURUQCCkURkYBCUUQkoFAUEQkoFEVEAgpFEZGAQlFEJKBQFBEJKBRFRAIKRRGRgEJRRCSgUBQRCWxyKJrZoWZ2gplVT0dBIiLZVOZQNLPrzeydhLaBwH+B94GvzaxhmusTEcmoVEaK7YGf1j4ws+PitkHAzcAOwI1prU5EJMOqpNC3EdAveHwG8Btwgbu7mdUHTgOuS195IiKZlcpIsRawLHh8HDDM3T1+/C2wU7oKExHJhlRC8RfgQAAz2w3YF/goWF4XKExfaSIimZfK4fM7QFczqwz8gSgAhwTL9wdmpK80EZHMSyUUexKNFLsSBeI17j4LwMxqAG2B59JeoYhIBpU5FN19AXC8mdUGlrv7yoQuxwA/p7M4EZFMS2WkCIC7L0rSthyYmJaKRESyqMRQNLNdN2aD7v7ThnuJiOSm0kaKMwAvZXlJKm9cKSIi2VdaKPZk40JRRKTCKjEU3b17BusQEckJ+ugwEZFAyrPPAGa2FbANSUJVEy0iUpGlFIpm1h64BWhaSjdNtIhIhZXK5ymeAQwkCtKnAANeBl4DVgJfEE3OiIhUWKmMFK8HJgOHAlsBlwPPu/sIM9sfGAVMSH+JIiKZk8pEy4FAP3dfAayJ2yoDuPsk4GngH+ktT0Qks1IJxcrAvPjn5fF9nWD5FKJPyhERqbBSCcWZwG6w7r3Os4HmwfK9gaXpK01EJPNSOac4GmgF3BY/HgxcbWbLiML1SqLPXBQRqbBSCcU+QFszqxGPFG8GWgDd4+XfEE3GiIhUWKl8nuI4YFzweA5wsJkdCKwGJrv7mpLWFxGpCDbqHS0hd/8qHYWIiOQCvfdZRCRQ5pGima1hwx8l5u6+yaNPEZFsSSXA+lM8FKsAexJ9u99X6B0tIlLBpTLR0rmkZWZ2JNElOlekoSYRkaxJy6Guu482sxeA+4Cj07HNEnaEr1ldbpuXiq1GZX1QvCRXyVLom8b9fg80S+P2REQyLp2h2JL/vSdaRKRCSmX2+cISFtUjevvfScCz6ShKRCRbUjmn2Jdo9jnZ0fkq4Dngb2moSUQka1IJxWOTtDkwH5ju7vqEHBGp8FK5JOej8ixERCQXpPIdLdPM7LRSlp9qZtPSU5aISHakMvvciOi7WUpSi/hDaEVEKqp0XpLTEFiWxu2JiGRcqecUzexoousP1zrTzBon6VoPaI/e+ywiFdyGJlqOBbrFPztwZnxLJh+4Nk11iYhkxYZC8RGi6xMNmAZcA/wroY8DS9x9ftqrExHJsFJD0d0LgAIAMzsW+Db+GgIRkc1SKhMtXwM7lLTQzA40s7qbXpKISPakEor3ER1Kl+QF4O5NqkZEJMtSCcVjKf17nQcTfTCEiEiFlUoo7gj8VMrymXEfEZEKK5VQXErp71jZDSjctHJERLIrlVD8DOhkZlsnLojbLgT+m67CRESyIZVQfADYGRhtZmebWWMz29PMzgZGx8vuL48iRUQyJZWPDhtpZl2BXsArCYtXAle5+7B0FicikmkpfZufuz9lZu8C7YDGRO90mQK87u6/mFmeu+u8oohUWCl/xam7/wI8HLaZ2aFmdjNwLrBtmmoTEcm4jf7eZzOrB1wAdAH2Jxo1Tk1TXSIiWZHy5ymaWWszewVYO2KsBvQADnD3fdJcn4hIRpVppGhmuwN/BjoRzTLPAV4HzgNudvc3y61CEZEMKnWkaGbnmdlw4HvgRmA80BbYiWh0mOzrTkVEKqwNjRRf5H+fozgw/MxEM/PyLExEJBs2dE6xiOgLq04HTjKzGuVekYhIFm0oFLcnGiVuCwwAZpnZc/F3t+jQWUQ2O6WGorsvdPfH3b0Z0JwoGM8ARgKfEn0VQZ1yr1JEJEPKfEmOu3/h7lcSfTxYR+CbeNGzZjbBzG4xs/3Ko0gRkUxJ+TpFdy9094HufjywJ3AnUBfoCUxMc30iIhmVciiG3H2Gu99GNBlzMqDrFUWkQtvot/mF3N2B9+ObiEiFtUkjRRGRzY1CUUQkoFAUEQkoFEVEAgpFEZGAQlFEJKBQFBEJKBRFRAIKRRGRgEJRRCSgUBQRCSgURUQCCkURkYBCUUQkoFAUEQkoFEVEAgpFEZGAQlFEJKBQFBEJKBRFRAIKRRGRgEJRRCSgUBQRCSgURUQCCkURkYBCMQdNnvwdHc6/kCb7HsDWdRtQZ9uGNDvscB59vA9FRUUlrjdi5IdUqlaTStVqkp//QwYrlvLw9ZR8ejz2DK3//Bf2/VM7mrftSIdrb+bT8ROK9V21ajW9+g3iqHO70OSEMzmu4+X0ffNd3L3UfYz6YiK7HdOG3Y5pw4yZv5bXU6lQqmS7ACnu55kzmb9gAeeeczY777wTq1evZvTosVx73Q2MHPkhb73xarF1ioqKuOqv11KrVi2WLl2ahaol3Z4a9CajPp/IScccSae2p7B0+Qpe+/cwzr/uVu649go6nnHyur43P9SHQUOG0uHU1hzUdC8+Gfcl3Xo9RcGixVzduUPS7RetXMmtDz9JzRrVWbZ8RaaeVs5TKOagE09oxYkntFqvrevll7FN3W3o88RTTJkylb33brLe8gcf7sX8BQu4uMuf6fXo45ksV8pJ57Pa8MDfr6F6XrV1bR3POImTulzN/c8OoMOpralSpTLf5k9n0JChdDnndG676mIAOpzamituu4feL71G+zatabhtvWLbf+aVtylYvIQOp57Ic68NztjzynU6fK5Adtt1VwAWFixcr/3HH3/izrvv5e47e1Kndu1slCbloPn+TdcLRIDqeXkcd8RhFCxewpz5CwB4d8QnAFx09mnr9b3o7DYUFq1k6Cdji2175u+zeWzAK9x0aSe2rlWrnJ5BxZS1UDSz581stplNylYNuW7ZsmXMnTuXGTN+ZNArr3H/gw+zww7bc+ABB6zX7+prr+PAA/an84Uds1SpZNLsufOpUrkydbbeCoCvpnxPg3rbsPP2263X76CmTahUqRJfT80vto3ujz7NPns04pyTjs9IzRVJNg+f+wKPA/2zWENOu++Bh+h5x13rHrc4rDlP9n6MGjVqrGt7d8h7vPvev/ls9MeYWTbKlAyaOuMn3v9kNK2OakHNGtUBmDVvPg3rb1usb7WqValbe2t+nzNvvfbho//L8DHjGPzkg/qdSSJroejuH5tZo2ztvyK48ILz+b+jjmTevPmM/OgjvvrqaxYWFKxbvnz5cq7+2/V0uagzhzZrlsVKJRMWL11G1273UD0vb925Q4AVhUVsVbNm0nXyqlWlMLhiYUVhId0efZr2p5zAAXs3LveaK6Kcn2gxs0uBSwF23XWXLFeTWXvssTt77LE7AOe2O5uHez1G65PbMGH8ZzRtug933n0vCxcWcGfP7tktVMrdisJCuvyjJz/9Oov+93dnp4b/O1SunleNopUrk65XWLSSvGr/Oy/52IBXWbRkKTdcrFMtJcn5iRZ3f9rdm7t78wb162e7nKw6r307Vq5cyYsDX+bXX3/lwYd7cUmXi1i4sID8/B/Iz/+B+Quik+8//fwz06fPyG7BkhZFK1dy6S138cU3U3iix00cfvD655QbbluPWXPnJ11vwaLF6w6tZ82dxzOvvMV5bVqzaMlSZsz8lRkzf2XhosUA/DJ7Dj/99nv5P6Ecl/MjRfmfFSsKAViwcCGzZ8+hsLCQ+x54kPseeLBY31atT6ZOnTosmPNbpsuUNFq1ajVXdr+XT8ZPoNct13H8kS2K9Tlg78Z8Mn4Cv8yavd4IcuJ337NmzRoOaLInAHMXFFBYtJInBr7BEwPfKLad8669hdpb1eLrIYPK7wlVAArFHDR79my22267Yu1PPv0sEE247L57I159+aVifV57/Q1ee+NNHn3kQXbdZcs63bC5WbNmDdfe9RBDP/2Me66/itOOPzppv1Na/h99XnqdF954h1u6dlnX3veNd6hWtQqt/3g4ALvs0JA+Pf5ebP0hIz9hyIej6HH1ZezUsEH5PJkKJGuhaGYvAy2B+mY2E+jm7s9lq55ccnnXvzBv/nyOOfqP7LLLzixcWMAHw4YzbPgIjjzicM7v0J6qVaty9llti6076ZtvAPjTiSfSuPGemS5d0uiOPs8zePjHHH7w/uTlVePNoSPXW/7H5gfToF5d9m+yJ+1OPoFnX/0XS5ct56CmTfhk3Je8O/JTruncYd3hc+2tanFKy6OK7Wfq9B+BUbRs0YxGO++YiaeW07I5+5z8vUfCue3Ood+AATzftx9z5swlLy+PvZvsxT133cFfr+pK1apVs12iZMA330fvXx87YRJjJxS/nHfQI3fRoF5dAO66ris7NWzAa/8exuvvD2fn7RvS/S+X0PmsNhmteXNgG3rDeC5pfmgzHzd2VLbLkBz185jh2S5BclTT49rkL13le5Wlb87PPouIZJJCUUQkoFAUEQkoFEVEAgpFEZGAQlFEJKBQFBEJKBRFRAIKRRGRgEJRRCSgUBQRCSgURUQCCkURkYBCUUQkoFAUEQkoFEVEAgpFEZGAQlFEJKBQFBEJKBRFRAIKRRGRgEJRRCSgUBQRCSgURUQCCkURkYBCUUQkoFAUEQkoFEVEAgpFEZGAQlFEJKBQFBEJKBRFRAIKRRGRgEJRRCSgUBQRCSgURUQCCkURkYBCUUQkoFAUEQkoFEVEAgpFEZGAQlFEJKBQFBEJKBRFRAIKRRGRgEJRRCSgUBQRCSgURUQCCkURkYBCUUQkoFAUEQkoFEVEAgpFEZGAQlFEJKBQFBEJKBRFRAIKRRGRgEJRRCSgUBQRCSgURUQCCkURkYC5e7ZrKDMzmwP8mO06ckh9YG62i5CcpN+N9e3m7g3K0rFChaKsz8zGu3vzbNchuUe/GxtPh88iIgGFoohIQKFYsT2d7QIkZ+l3YyMpFCswd98sf/HNrLOZuZm1LK2tvPa1OdhcfzcyQaEoAJhZyzgcwtsSM/vczK42s8rZrnFjxc+tu5ltk+1aJPcpFCXRy0BH4ELgdqAm8AjwRDaLAgYANYCPN2LdlkA3IFkobsp2ZTNUJdsFSM75wt1fXPvAzJ4AJgMXm9mt7j4rcQUzqwpUdvcV5VWUu68GVleU7UrFpZGilMrdFwFjAAP2iA9D3cz2M7OHzGwmsAI4fO06ZtbKzIaa2UIzW2FmX5nZ5cm2b2YXm9l3ZlZoZvlmdnW8r8R+Sc/9mVk1M7vRzCaY2TIzKzCz8WZ2Vby8L9EoEWB6cGqg+wa2W9/MepvZz2ZWFN/3NrNtS6jrODO73sx+iJ/LVDPrlOR5nGJmH5nZXDNbbmY/mdmbZtaktH8HyRyNFKVUZmZA4/hh+A6Jl4DlwIOAA7/F/S8FngTGAncCS4ETgCfMbE93vyHY9jXAw8BE4J9Eh+o3ALPLWFs14D9Eh8dDgReJAvoA4EzgceApoDbQFrg2eA5flbLdOsDo+Hk/D3wBHAJcARxnZi3cfXHCancRHYY/BRTGffuaWb67j4q3ewwwGPgauBtYCOwItIr3NbUsz1vKmbvrphtEweLAbURvEWsAHAg8E7ePift1jx9/CFRJ2MYORKE0MMn2exEdpu4ZP96GKDC/BWoG/XYGlsT7aBm0d07SdmPcdleS/VUKfl5bc6Mk/ZJt9864rWtC3yvj9tuTrP8lUC1o34koHF8O2h6K+26X7X9v3Uq+6fBZEvUA5hCN1iYCFxGNbs5I6PeIu69KaDsbyAOeiw8/192Ad4hO1xwf9z2RaGTY292Xrd2Au88kGoWWxfnAAqBn4gJ3X1PGbSTTlug1SLys5SmikWbbJOv0cfeiYP+/EI389gr6FMT3Z5mZjtJylP45Tq80AAACbElEQVRhJNHTwGtEI5qlwFR3n5+kX7JDvabx/bBStt8wvt8jvv8uSZ9vy1AnRIEzwdM/wbM7MD4x9N19lZlNAZolWWdakrZ5wG7B48eB04E+wL1m9inwPtFock5aKpdNplCURN+7e2mhttayJG1rJ0guJD7HmMS0hL7JPpGk2ERLKXLlE01KmsFe91zcfZ6ZHQb8keg869FE51R7mNnJ7j6m/MuUDVEoSjp9H9/PLUOw/hDfNwVGJCxrStlMBZqaWZ67F5bSL9XgnAbsbWZVwtFifMjbhOSjwjLx6BKgD+MbZnYg8DlwC3DKxm5X0kfnFCWdXiWaXOhhZjUSF5pZHTPLix9+QDR7faWZ1Qz67AycV8b9vQTUJQqUxH2Fo80l8X29Mm73baKJposT2i+J298q43YSa6qfpPk7otehrLVJOdNIUdLG3Wea2RXAs8BkMxtA9KHADYgukzkD2BeY4e4LzOxW4AFgtJn1J5p4uZxoxHlIGXbZC2gD3BIflg4lmv3eD9ib6FIXiC4Pgug83ktxn0nuPqmE7d4HnAP0NrNmRDPLhwBdgCnx8o3xTBz6Q4lelxrAucDWQP+N3KakmUJR0srdXzCzqcD1wGVEl97MJQqTW4Hfg74PmtkS4G9E1+39TBSSBUTXB25oX0VmdiJwHdHo8i6iwPseeCHoN8rMbiIK3GeIfu97AElD0d0LzOyouM9pwJ+BWUTXX3bz4tcoltUAokt4OhH9R7GIaFLpbHd/YyO3KWmmT94WEQnonKKISEChKCISUCiKiAQUiiIiAYWiiEhAoSgiElAoiogEFIoiIgGFoohI4P8BOFVKmhiyR+YAAAAASUVORK5CYII=\n"
          },
          "metadata": {
            "needs_background": "light"
          }
        }
      ],
      "execution_count": 38,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2021-09-07T17:51:47.629Z",
          "iopub.execute_input": "2021-09-07T17:51:47.639Z",
          "shell.execute_reply": "2021-09-07T17:51:47.696Z",
          "iopub.status.idle": "2021-09-07T17:51:47.706Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score\n",
        "\n",
        "\n",
        "print('Precision: %.3f' % precision_score(y_test, prediction))\n",
        "print('Recall: %.3f' % recall_score(y_test, prediction))\n",
        "print('Accuracy: %.3f' % accuracy_score(y_test, prediction))\n",
        "print('F1 Score: %.3f' % f1_score(y_test, prediction))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Precision: 0.479\n",
            "Recall: 0.857\n",
            "Accuracy: 0.474\n",
            "F1 Score: 0.614\n"
          ]
        }
      ],
      "execution_count": 39,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2021-09-07T18:01:03.240Z",
          "iopub.execute_input": "2021-09-07T18:01:03.251Z",
          "iopub.status.idle": "2021-09-07T18:01:03.278Z",
          "shell.execute_reply": "2021-09-07T18:01:03.295Z"
        }
      }
    }
  ],
  "metadata": {
    "kernel_info": {
      "name": "python3"
    },
    "kernelspec": {
      "display_name": "Python 3 (Spyder)",
      "language": "python3",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.6.13",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "nteract": {
      "version": "0.28.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
}